OCR Models#

Data classes for OCR results in different coordinate systems.

BoundingBox#

BoundingBox dataclass #

BoundingBox(left: float, top: float, right: float, bottom: float, width: float, height: float)

Normalized bounding box with coordinates in 0-1 range.

Coordinates are relative to image dimensions: - (0, 0) is top-left - (1, 1) is bottom-right

Attributes:

Name Type Description
left float

Distance from left edge (0-1).

top float

Distance from top edge (0-1).

right float

Distance from left edge to right side (0-1).

bottom float

Distance from top edge to bottom side (0-1).

width float

Box width (0-1).

height float

Box height (0-1).

to_dict #

to_dict() -> dict

Convert to dictionary.

Source code in src/panoocr/ocr/models.py
def to_dict(self) -> dict:
    """Convert to dictionary."""
    return {
        "left": self.left,
        "top": self.top,
        "right": self.right,
        "bottom": self.bottom,
        "width": self.width,
        "height": self.height,
    }

from_dict classmethod #

from_dict(data: dict) -> 'BoundingBox'

Create from dictionary.

Source code in src/panoocr/ocr/models.py
@classmethod
def from_dict(cls, data: dict) -> "BoundingBox":
    """Create from dictionary."""
    return cls(
        left=data["left"],
        top=data["top"],
        right=data["right"],
        bottom=data["bottom"],
        width=data["width"],
        height=data["height"],
    )

FlatOCRResult#

OCR result from a flat (perspective) image with normalized bounding box coordinates.

FlatOCRResult dataclass #

FlatOCRResult(text: str, confidence: float, bounding_box: BoundingBox, engine: Optional[str] = None)

OCR result from a flat (perspective) image.

Attributes:

Name Type Description
text str

Recognized text content.

confidence float

Recognition confidence (0-1).

bounding_box BoundingBox

Normalized bounding box in image coordinates.

engine Optional[str]

Name of the OCR engine used.

text instance-attribute #

text: str

confidence instance-attribute #

confidence: float

bounding_box instance-attribute #

bounding_box: BoundingBox

engine class-attribute instance-attribute #

engine: Optional[str] = None

to_dict #

to_dict() -> dict

Convert to dictionary.

Source code in src/panoocr/ocr/models.py
def to_dict(self) -> dict:
    """Convert to dictionary."""
    return {
        "text": self.text,
        "confidence": self.confidence,
        "bounding_box": self.bounding_box.to_dict(),
        "engine": self.engine,
    }

from_dict classmethod #

from_dict(data: dict) -> 'FlatOCRResult'

Create from dictionary.

Source code in src/panoocr/ocr/models.py
@classmethod
def from_dict(cls, data: dict) -> "FlatOCRResult":
    """Create from dictionary."""
    return cls(
        text=data["text"],
        confidence=data["confidence"],
        bounding_box=BoundingBox.from_dict(data["bounding_box"]),
        engine=data.get("engine"),
    )

to_sphere #

to_sphere(horizontal_fov: float, vertical_fov: float, yaw_offset: float, pitch_offset: float) -> 'SphereOCRResult'

Convert to spherical OCR result using camera parameters.

Uses proper 3D rotation via perspective_to_sphere() to correctly transform bounding box coordinates from perspective image space to world spherical coordinates. This accounts for the coupling between yaw and pitch that occurs when the camera has a non-zero pitch offset.

All parameters are in degrees.

Parameters:

Name Type Description Default
horizontal_fov float

Horizontal field of view of the camera.

required
vertical_fov float

Vertical field of view of the camera.

required
yaw_offset float

Horizontal offset of the camera.

required
pitch_offset float

Vertical offset of the camera.

required

Returns:

Type Description
'SphereOCRResult'

SphereOCRResult with spherical coordinates.

Source code in src/panoocr/ocr/models.py
def to_sphere(
    self,
    horizontal_fov: float,
    vertical_fov: float,
    yaw_offset: float,
    pitch_offset: float,
) -> "SphereOCRResult":
    """Convert to spherical OCR result using camera parameters.

    Uses proper 3D rotation via perspective_to_sphere() to correctly
    transform bounding box coordinates from perspective image space to
    world spherical coordinates. This accounts for the coupling between
    yaw and pitch that occurs when the camera has a non-zero pitch offset.

    All parameters are in degrees.

    Args:
        horizontal_fov: Horizontal field of view of the camera.
        vertical_fov: Vertical field of view of the camera.
        yaw_offset: Horizontal offset of the camera.
        pitch_offset: Vertical offset of the camera.

    Returns:
        SphereOCRResult with spherical coordinates.
    """
    if horizontal_fov <= 0 or vertical_fov <= 0:
        raise ValueError("FOV must be positive")

    # Convert center point using proper 3D rotation
    center_x = (self.bounding_box.left + self.bounding_box.right) * 0.5
    center_y = (self.bounding_box.top + self.bounding_box.bottom) * 0.5

    center_yaw, center_pitch = perspective_to_sphere(
        center_x, center_y,
        horizontal_fov, vertical_fov,
        yaw_offset, pitch_offset,
    )

    # Convert all four corners using proper 3D rotation
    tl_yaw, tl_pitch = perspective_to_sphere(
        self.bounding_box.left, self.bounding_box.top,
        horizontal_fov, vertical_fov, yaw_offset, pitch_offset,
    )
    tr_yaw, tr_pitch = perspective_to_sphere(
        self.bounding_box.right, self.bounding_box.top,
        horizontal_fov, vertical_fov, yaw_offset, pitch_offset,
    )
    bl_yaw, bl_pitch = perspective_to_sphere(
        self.bounding_box.left, self.bounding_box.bottom,
        horizontal_fov, vertical_fov, yaw_offset, pitch_offset,
    )
    br_yaw, br_pitch = perspective_to_sphere(
        self.bounding_box.right, self.bounding_box.bottom,
        horizontal_fov, vertical_fov, yaw_offset, pitch_offset,
    )

    # Compute angular width and height from world-space corners
    corner_yaws = [tl_yaw, tr_yaw, bl_yaw, br_yaw]
    corner_pitches = [tl_pitch, tr_pitch, bl_pitch, br_pitch]

    # Handle yaw wrap-around at ±180° boundary
    yaw_range = max(corner_yaws) - min(corner_yaws)
    if yaw_range > 180:
        shifted_yaws = [y + 360 if y < 0 else y for y in corner_yaws]
        width = max(shifted_yaws) - min(shifted_yaws)
    else:
        width = yaw_range

    height = max(corner_pitches) - min(corner_pitches)

    return SphereOCRResult(
        text=self.text,
        confidence=self.confidence,
        yaw=center_yaw,
        pitch=center_pitch,
        width=width,
        height=height,
        engine=self.engine,
    )

SphereOCRResult#

OCR result in spherical (panorama) coordinates.

SphereOCRResult dataclass #

SphereOCRResult(text: str, confidence: float, yaw: float, pitch: float, width: float, height: float, engine: Optional[str] = None)

OCR result in spherical (panorama) coordinates.

Attributes:

Name Type Description
text str

Recognized text content.

confidence float

Recognition confidence (0-1).

yaw float

Horizontal angle in degrees (-180 to 180).

pitch float

Vertical angle in degrees (-90 to 90).

width float

Angular width in degrees.

height float

Angular height in degrees.

engine Optional[str]

Name of the OCR engine used.

text instance-attribute #

text: str

confidence instance-attribute #

confidence: float

yaw instance-attribute #

yaw: float

pitch instance-attribute #

pitch: float

width instance-attribute #

width: float

height instance-attribute #

height: float

engine class-attribute instance-attribute #

engine: Optional[str] = None

to_dict #

to_dict() -> dict

Convert to dictionary.

Source code in src/panoocr/ocr/models.py
def to_dict(self) -> dict:
    """Convert to dictionary."""
    return {
        "text": self.text,
        "confidence": self.confidence,
        "yaw": self.yaw,
        "pitch": self.pitch,
        "width": self.width,
        "height": self.height,
        "engine": self.engine,
    }

from_dict classmethod #

from_dict(data: dict) -> 'SphereOCRResult'

Create from dictionary.

Source code in src/panoocr/ocr/models.py
@classmethod
def from_dict(cls, data: dict) -> "SphereOCRResult":
    """Create from dictionary."""
    return cls(
        text=data["text"],
        confidence=data["confidence"],
        yaw=data["yaw"],
        pitch=data["pitch"],
        width=data["width"],
        height=data["height"],
        engine=data.get("engine"),
    )