OCR Models#

Data classes for OCR results in different coordinate systems.

BoundingBox#

BoundingBox dataclass #

BoundingBox(left: float, top: float, right: float, bottom: float, width: float, height: float)

Normalized bounding box with coordinates in 0-1 range.

Coordinates are relative to image dimensions: - (0, 0) is top-left - (1, 1) is bottom-right

Attributes:

Name Type Description
left float

Distance from left edge (0-1).

top float

Distance from top edge (0-1).

right float

Distance from left edge to right side (0-1).

bottom float

Distance from top edge to bottom side (0-1).

width float

Box width (0-1).

height float

Box height (0-1).

to_dict #

to_dict() -> dict

Convert to dictionary.

Source code in src/panoocr/ocr/models.py
def to_dict(self) -> dict:
    """Convert to dictionary."""
    return {
        "left": self.left,
        "top": self.top,
        "right": self.right,
        "bottom": self.bottom,
        "width": self.width,
        "height": self.height,
    }

from_dict classmethod #

from_dict(data: dict) -> 'BoundingBox'

Create from dictionary.

Source code in src/panoocr/ocr/models.py
@classmethod
def from_dict(cls, data: dict) -> "BoundingBox":
    """Create from dictionary."""
    return cls(
        left=data["left"],
        top=data["top"],
        right=data["right"],
        bottom=data["bottom"],
        width=data["width"],
        height=data["height"],
    )

FlatOCRResult#

OCR result from a flat (perspective) image with normalized bounding box coordinates.

FlatOCRResult dataclass #

FlatOCRResult(text: str, confidence: float, bounding_box: BoundingBox, engine: Optional[str] = None)

OCR result from a flat (perspective) image.

Attributes:

Name Type Description
text str

Recognized text content.

confidence float

Recognition confidence (0-1).

bounding_box BoundingBox

Normalized bounding box in image coordinates.

engine Optional[str]

Name of the OCR engine used.

text instance-attribute #

text: str

confidence instance-attribute #

confidence: float

bounding_box instance-attribute #

bounding_box: BoundingBox

engine class-attribute instance-attribute #

engine: Optional[str] = None

to_dict #

to_dict() -> dict

Convert to dictionary.

Source code in src/panoocr/ocr/models.py
def to_dict(self) -> dict:
    """Convert to dictionary."""
    return {
        "text": self.text,
        "confidence": self.confidence,
        "bounding_box": self.bounding_box.to_dict(),
        "engine": self.engine,
    }

from_dict classmethod #

from_dict(data: dict) -> 'FlatOCRResult'

Create from dictionary.

Source code in src/panoocr/ocr/models.py
@classmethod
def from_dict(cls, data: dict) -> "FlatOCRResult":
    """Create from dictionary."""
    return cls(
        text=data["text"],
        confidence=data["confidence"],
        bounding_box=BoundingBox.from_dict(data["bounding_box"]),
        engine=data.get("engine"),
    )

to_sphere #

to_sphere(horizontal_fov: float, vertical_fov: float, yaw_offset: float, pitch_offset: float) -> 'SphereOCRResult'

Convert to spherical OCR result using camera parameters.

All parameters are in degrees.

Parameters:

Name Type Description Default
horizontal_fov float

Horizontal field of view of the camera.

required
vertical_fov float

Vertical field of view of the camera.

required
yaw_offset float

Horizontal offset of the camera.

required
pitch_offset float

Vertical offset of the camera.

required

Returns:

Type Description
'SphereOCRResult'

SphereOCRResult with spherical coordinates.

Source code in src/panoocr/ocr/models.py
def to_sphere(
    self,
    horizontal_fov: float,
    vertical_fov: float,
    yaw_offset: float,
    pitch_offset: float,
) -> "SphereOCRResult":
    """Convert to spherical OCR result using camera parameters.

    All parameters are in degrees.

    Args:
        horizontal_fov: Horizontal field of view of the camera.
        vertical_fov: Vertical field of view of the camera.
        yaw_offset: Horizontal offset of the camera.
        pitch_offset: Vertical offset of the camera.

    Returns:
        SphereOCRResult with spherical coordinates.
    """
    if horizontal_fov <= 0 or vertical_fov <= 0:
        raise ValueError("FOV must be positive")

    # Calculate center point
    center_x = (self.bounding_box.left + self.bounding_box.right) * 0.5
    center_y = (self.bounding_box.top + self.bounding_box.bottom) * 0.5

    center_yaw, center_pitch = self._uv_to_yaw_pitch(
        horizontal_fov, vertical_fov, center_x, center_y
    )

    # Calculate corners for width/height
    left_yaw, top_pitch = self._uv_to_yaw_pitch(
        horizontal_fov, vertical_fov, self.bounding_box.left, self.bounding_box.top
    )

    right_yaw, bottom_pitch = self._uv_to_yaw_pitch(
        horizontal_fov,
        vertical_fov,
        self.bounding_box.right,
        self.bounding_box.bottom,
    )

    width = right_yaw - left_yaw
    height = top_pitch - bottom_pitch

    return SphereOCRResult(
        text=self.text,
        confidence=self.confidence,
        yaw=center_yaw + yaw_offset,
        pitch=center_pitch + pitch_offset,
        width=width,
        height=height,
        engine=self.engine,
    )

SphereOCRResult#

OCR result in spherical (panorama) coordinates.

SphereOCRResult dataclass #

SphereOCRResult(text: str, confidence: float, yaw: float, pitch: float, width: float, height: float, engine: Optional[str] = None)

OCR result in spherical (panorama) coordinates.

Attributes:

Name Type Description
text str

Recognized text content.

confidence float

Recognition confidence (0-1).

yaw float

Horizontal angle in degrees (-180 to 180).

pitch float

Vertical angle in degrees (-90 to 90).

width float

Angular width in degrees.

height float

Angular height in degrees.

engine Optional[str]

Name of the OCR engine used.

text instance-attribute #

text: str

confidence instance-attribute #

confidence: float

yaw instance-attribute #

yaw: float

pitch instance-attribute #

pitch: float

width instance-attribute #

width: float

height instance-attribute #

height: float

engine class-attribute instance-attribute #

engine: Optional[str] = None

to_dict #

to_dict() -> dict

Convert to dictionary.

Source code in src/panoocr/ocr/models.py
def to_dict(self) -> dict:
    """Convert to dictionary."""
    return {
        "text": self.text,
        "confidence": self.confidence,
        "yaw": self.yaw,
        "pitch": self.pitch,
        "width": self.width,
        "height": self.height,
        "engine": self.engine,
    }

from_dict classmethod #

from_dict(data: dict) -> 'SphereOCRResult'

Create from dictionary.

Source code in src/panoocr/ocr/models.py
@classmethod
def from_dict(cls, data: dict) -> "SphereOCRResult":
    """Create from dictionary."""
    return cls(
        text=data["text"],
        confidence=data["confidence"],
        yaw=data["yaw"],
        pitch=data["pitch"],
        width=data["width"],
        height=data["height"],
        engine=data.get("engine"),
    )