mlx3d.cameras¶

`mlx3d.cameras` ¶

`Camera` `dataclass` ¶

A single pinhole camera.

Attributes:

Name	Type	Description
`R`	`array`	(3, 3) world-to-camera rotation.
`t`	`array`	(3,) world-to-camera translation.
`fx,`	`fy`	focal lengths in pixels.
`cx,`	`cy`	principal point in pixels.
`width,`	`height`	image size in pixels.
`znear,`	`zfar`	clipping range used by renderers.
`orthographic`	`bool`	if `True`, use an orthographic projection (parallel rays, no perspective divide). `fx`/`fy` then act as pixels-per-world-unit instead of focal lengths.
`distortion`	`tuple[float, ...] \| None`	optional lens distortion coefficients. Brown-Conrady `(k1, k2, p1, p2[, k3])` by default, or OpenCV fisheye `(k1, k2, k3, k4)` when `fisheye=True`. Applied in :meth:`project_points` and inverted in :meth:`generate_rays` / :meth:`unproject_points`.
`fisheye`	`bool`	select the equidistant fisheye distortion model.

Source code in src/mlx3d/cameras/cameras.py

@dataclass
class Camera:
    """A single pinhole camera.

    Attributes:
        R: (3, 3) world-to-camera rotation.
        t: (3,) world-to-camera translation.
        fx, fy: focal lengths in pixels.
        cx, cy: principal point in pixels.
        width, height: image size in pixels.
        znear, zfar: clipping range used by renderers.
        orthographic: if ``True``, use an orthographic projection (parallel
            rays, no perspective divide). ``fx``/``fy`` then act as
            pixels-per-world-unit instead of focal lengths.
        distortion: optional lens distortion coefficients. Brown-Conrady
            ``(k1, k2, p1, p2[, k3])`` by default, or OpenCV fisheye
            ``(k1, k2, k3, k4)`` when ``fisheye=True``. Applied in
            :meth:`project_points` and inverted in :meth:`generate_rays` /
            :meth:`unproject_points`.
        fisheye: select the equidistant fisheye distortion model.
    """

    R: mx.array
    t: mx.array
    fx: float
    fy: float
    cx: float
    cy: float
    width: int
    height: int
    znear: float = 0.01
    zfar: float = 100.0
    orthographic: bool = False
    distortion: tuple[float, ...] | None = None
    fisheye: bool = False

    @classmethod
    def orthographic_camera(
        cls,
        scale: float,
        width: int,
        height: int,
        R: mx.array | None = None,
        t: mx.array | None = None,
        **kwargs,
    ) -> "Camera":
        """Create an orthographic camera.

        ``scale`` is the world-units half-height of the view volume: the visible
        region spans ``[-scale, scale]`` vertically in camera space, mapped to
        the image height (the width follows from the aspect ratio).
        """
        ppwu = (height / 2.0) / float(scale)  # pixels per world unit
        if R is None:
            R = mx.eye(3)
        if t is None:
            t = mx.zeros((3,))
        return cls(
            R=R,
            t=t,
            fx=ppwu,
            fy=ppwu,
            cx=width / 2.0,
            cy=height / 2.0,
            width=width,
            height=height,
            orthographic=True,
            **kwargs,
        )

    @classmethod
    def from_fov(
        cls,
        fov: float,
        width: int,
        height: int,
        R: mx.array | None = None,
        t: mx.array | None = None,
        degrees: bool = True,
        **kwargs,
    ) -> "Camera":
        """Create a camera from a vertical field of view (the horizontal FoV
        follows from the aspect ratio)."""
        if degrees:
            fov = math.radians(fov)
        f = fov_to_focal(fov, height)
        if R is None:
            R = mx.eye(3)
        if t is None:
            t = mx.zeros((3,))
        return cls(
            R=R,
            t=t,
            fx=f,
            fy=f,
            cx=width / 2.0,
            cy=height / 2.0,
            width=width,
            height=height,
            **kwargs,
        )

    @classmethod
    def look_at(
        cls,
        eye,
        at=(0.0, 0.0, 0.0),
        up=(0.0, 1.0, 0.0),
        fov: float = 60.0,
        width: int = 512,
        height: int = 512,
        degrees: bool = True,
        **kwargs,
    ) -> "Camera":
        """Create a camera at ``eye`` looking at ``at``."""
        R, t = look_at(mx.array(eye), mx.array(at), mx.array(up))
        return cls.from_fov(fov, width, height, R=R, t=t, degrees=degrees, **kwargs)

    @property
    def K(self) -> mx.array:
        """(3, 3) intrinsic matrix."""
        return mx.array([[self.fx, 0.0, self.cx], [0.0, self.fy, self.cy], [0.0, 0.0, 1.0]])

    @property
    def fov_x(self) -> float:
        return focal_to_fov(self.fx, self.width)

    @property
    def fov_y(self) -> float:
        return focal_to_fov(self.fy, self.height)

    @property
    def camera_center(self) -> mx.array:
        """(3,) camera position in world coordinates."""
        return -(self.R.T @ self.t)

    @property
    def world_to_camera_matrix(self) -> mx.array:
        """(4, 4) homogeneous world-to-camera matrix."""
        top = mx.concatenate([self.R, self.t[:, None]], axis=1)
        bottom = mx.array([[0.0, 0.0, 0.0, 1.0]])
        return mx.concatenate([top, bottom], axis=0)

    def world_to_camera(self, points: mx.array) -> mx.array:
        """Transform world points ``(..., 3)`` into the camera frame."""
        return points @ self.R.T + self.t

    def camera_to_world(self, points: mx.array) -> mx.array:
        """Transform camera-frame points ``(..., 3)`` back to world coordinates."""
        return (points - self.t) @ self.R

    def project_points(self, points: mx.array, eps: float = 1e-8) -> tuple[mx.array, mx.array]:
        """Project world points ``(..., 3)`` to pixel coordinates.

        Returns:
            ``(xy, depth)`` where ``xy`` is ``(..., 2)`` pixel coordinates and
            ``depth`` is ``(...,)`` z-depth in the camera frame. Points behind
            the camera have negative depth; callers should mask on it.
        """
        pc = self.world_to_camera(points)
        z = pc[..., 2]
        if self.orthographic:
            u = self.fx * pc[..., 0] + self.cx
            v = self.fy * pc[..., 1] + self.cy
            return mx.stack([u, v], axis=-1), z
        inv_z = 1.0 / mx.where(mx.abs(z) < eps, mx.full(z.shape, eps), z)
        x, y = pc[..., 0] * inv_z, pc[..., 1] * inv_z
        x, y = self._distort(x, y)
        u = self.fx * x + self.cx
        v = self.fy * y + self.cy
        return mx.stack([u, v], axis=-1), z

    def _distort(self, x: mx.array, y: mx.array) -> tuple[mx.array, mx.array]:
        """Apply lens distortion to normalized image coords (identity if none)."""
        if self.distortion is None:
            return x, y
        if self.fisheye:
            return _fisheye_distort(x, y, self.distortion)
        return _brown_distort(x, y, self.distortion)

    def _undistort(self, x: mx.array, y: mx.array) -> tuple[mx.array, mx.array]:
        """Invert lens distortion on normalized image coords (identity if none)."""
        if self.distortion is None:
            return x, y
        if self.fisheye:
            return _fisheye_undistort(x, y, self.distortion)
        return _brown_undistort(x, y, self.distortion)

    def unproject_points(self, xy: mx.array, depth: mx.array) -> mx.array:
        """Lift pixel coordinates ``(..., 2)`` with z-depths ``(...,)`` back to world points."""
        if self.orthographic:
            x = (xy[..., 0] - self.cx) / self.fx
            y = (xy[..., 1] - self.cy) / self.fy
            return self.camera_to_world(mx.stack([x, y, depth], axis=-1))
        xd = (xy[..., 0] - self.cx) / self.fx
        yd = (xy[..., 1] - self.cy) / self.fy
        x, y = self._undistort(xd, yd)
        return self.camera_to_world(mx.stack([x * depth, y * depth, depth], axis=-1))

    def generate_rays(self) -> tuple[mx.array, mx.array]:
        """Generate one ray per pixel (at pixel centers).

        Returns:
            ``(origins, directions)``, both ``(height, width, 3)`` in world
            coordinates. Directions are normalized.
        """
        u = mx.arange(self.width, dtype=mx.float32) + 0.5
        v = mx.arange(self.height, dtype=mx.float32) + 0.5
        uu = mx.broadcast_to(u[None, :], (self.height, self.width))
        vv = mx.broadcast_to(v[:, None], (self.height, self.width))
        xc = (uu - self.cx) / self.fx
        yc = (vv - self.cy) / self.fy
        if self.orthographic:
            # Parallel rays: shared forward direction, per-pixel origins on the
            # image plane (z = 0 in camera space).
            zeros = mx.zeros_like(uu)
            origins = self.camera_to_world(mx.stack([xc, yc, zeros], axis=-1))
            fwd = mx.array([0.0, 0.0, 1.0]) @ self.R
            fwd = fwd / mx.linalg.norm(fwd)
            dirs_world = mx.broadcast_to(fwd, origins.shape)
            return origins, dirs_world
        xc, yc = self._undistort(xc, yc)  # pixels carry distortion; rays must not
        dirs_cam = mx.stack([xc, yc, mx.ones_like(uu)], axis=-1)
        dirs_world = dirs_cam @ self.R  # == dirs_cam @ R^-T == R^T applied per-vector
        dirs_world = dirs_world / mx.linalg.norm(dirs_world, axis=-1, keepdims=True)
        origins = mx.broadcast_to(self.camera_center, dirs_world.shape)
        return origins, dirs_world

`K` `property` ¶

(3, 3) intrinsic matrix.

`camera_center` `property` ¶

(3,) camera position in world coordinates.

`world_to_camera_matrix` `property` ¶

(4, 4) homogeneous world-to-camera matrix.

`orthographic_camera(scale, width, height, R=None, t=None, **kwargs)` `classmethod` ¶

Create an orthographic camera.

scale is the world-units half-height of the view volume: the visible region spans [-scale, scale] vertically in camera space, mapped to the image height (the width follows from the aspect ratio).

Source code in src/mlx3d/cameras/cameras.py

@classmethod
def orthographic_camera(
    cls,
    scale: float,
    width: int,
    height: int,
    R: mx.array | None = None,
    t: mx.array | None = None,
    **kwargs,
) -> "Camera":
    """Create an orthographic camera.

    ``scale`` is the world-units half-height of the view volume: the visible
    region spans ``[-scale, scale]`` vertically in camera space, mapped to
    the image height (the width follows from the aspect ratio).
    """
    ppwu = (height / 2.0) / float(scale)  # pixels per world unit
    if R is None:
        R = mx.eye(3)
    if t is None:
        t = mx.zeros((3,))
    return cls(
        R=R,
        t=t,
        fx=ppwu,
        fy=ppwu,
        cx=width / 2.0,
        cy=height / 2.0,
        width=width,
        height=height,
        orthographic=True,
        **kwargs,
    )

`from_fov(fov, width, height, R=None, t=None, degrees=True, **kwargs)` `classmethod` ¶

Create a camera from a vertical field of view (the horizontal FoV follows from the aspect ratio).

Source code in src/mlx3d/cameras/cameras.py

@classmethod
def from_fov(
    cls,
    fov: float,
    width: int,
    height: int,
    R: mx.array | None = None,
    t: mx.array | None = None,
    degrees: bool = True,
    **kwargs,
) -> "Camera":
    """Create a camera from a vertical field of view (the horizontal FoV
    follows from the aspect ratio)."""
    if degrees:
        fov = math.radians(fov)
    f = fov_to_focal(fov, height)
    if R is None:
        R = mx.eye(3)
    if t is None:
        t = mx.zeros((3,))
    return cls(
        R=R,
        t=t,
        fx=f,
        fy=f,
        cx=width / 2.0,
        cy=height / 2.0,
        width=width,
        height=height,
        **kwargs,
    )

`look_at(eye, at=(0.0, 0.0, 0.0), up=(0.0, 1.0, 0.0), fov=60.0, width=512, height=512, degrees=True, **kwargs)` `classmethod` ¶

Create a camera at eye looking at at.

Source code in src/mlx3d/cameras/cameras.py

@classmethod
def look_at(
    cls,
    eye,
    at=(0.0, 0.0, 0.0),
    up=(0.0, 1.0, 0.0),
    fov: float = 60.0,
    width: int = 512,
    height: int = 512,
    degrees: bool = True,
    **kwargs,
) -> "Camera":
    """Create a camera at ``eye`` looking at ``at``."""
    R, t = look_at(mx.array(eye), mx.array(at), mx.array(up))
    return cls.from_fov(fov, width, height, R=R, t=t, degrees=degrees, **kwargs)

`world_to_camera(points)` ¶

Transform world points (..., 3) into the camera frame.

Source code in src/mlx3d/cameras/cameras.py

def world_to_camera(self, points: mx.array) -> mx.array:
    """Transform world points ``(..., 3)`` into the camera frame."""
    return points @ self.R.T + self.t

`camera_to_world(points)` ¶

Transform camera-frame points (..., 3) back to world coordinates.

Source code in src/mlx3d/cameras/cameras.py

def camera_to_world(self, points: mx.array) -> mx.array:
    """Transform camera-frame points ``(..., 3)`` back to world coordinates."""
    return (points - self.t) @ self.R

`project_points(points, eps=1e-08)` ¶

Project world points (..., 3) to pixel coordinates.

Returns:

Type	Description
`array`	`(xy, depth)` where `xy` is `(..., 2)` pixel coordinates and
`array`	`depth` is `(...,)` z-depth in the camera frame. Points behind
`tuple[array, array]`	the camera have negative depth; callers should mask on it.

Source code in src/mlx3d/cameras/cameras.py

def project_points(self, points: mx.array, eps: float = 1e-8) -> tuple[mx.array, mx.array]:
    """Project world points ``(..., 3)`` to pixel coordinates.

    Returns:
        ``(xy, depth)`` where ``xy`` is ``(..., 2)`` pixel coordinates and
        ``depth`` is ``(...,)`` z-depth in the camera frame. Points behind
        the camera have negative depth; callers should mask on it.
    """
    pc = self.world_to_camera(points)
    z = pc[..., 2]
    if self.orthographic:
        u = self.fx * pc[..., 0] + self.cx
        v = self.fy * pc[..., 1] + self.cy
        return mx.stack([u, v], axis=-1), z
    inv_z = 1.0 / mx.where(mx.abs(z) < eps, mx.full(z.shape, eps), z)
    x, y = pc[..., 0] * inv_z, pc[..., 1] * inv_z
    x, y = self._distort(x, y)
    u = self.fx * x + self.cx
    v = self.fy * y + self.cy
    return mx.stack([u, v], axis=-1), z

`unproject_points(xy, depth)` ¶

Lift pixel coordinates (..., 2) with z-depths (...,) back to world points.

Source code in src/mlx3d/cameras/cameras.py

def unproject_points(self, xy: mx.array, depth: mx.array) -> mx.array:
    """Lift pixel coordinates ``(..., 2)`` with z-depths ``(...,)`` back to world points."""
    if self.orthographic:
        x = (xy[..., 0] - self.cx) / self.fx
        y = (xy[..., 1] - self.cy) / self.fy
        return self.camera_to_world(mx.stack([x, y, depth], axis=-1))
    xd = (xy[..., 0] - self.cx) / self.fx
    yd = (xy[..., 1] - self.cy) / self.fy
    x, y = self._undistort(xd, yd)
    return self.camera_to_world(mx.stack([x * depth, y * depth, depth], axis=-1))

`generate_rays()` ¶

Generate one ray per pixel (at pixel centers).

Returns:

Type	Description
`array`	`(origins, directions)`, both `(height, width, 3)` in world
`array`	coordinates. Directions are normalized.

Source code in src/mlx3d/cameras/cameras.py

def generate_rays(self) -> tuple[mx.array, mx.array]:
    """Generate one ray per pixel (at pixel centers).

    Returns:
        ``(origins, directions)``, both ``(height, width, 3)`` in world
        coordinates. Directions are normalized.
    """
    u = mx.arange(self.width, dtype=mx.float32) + 0.5
    v = mx.arange(self.height, dtype=mx.float32) + 0.5
    uu = mx.broadcast_to(u[None, :], (self.height, self.width))
    vv = mx.broadcast_to(v[:, None], (self.height, self.width))
    xc = (uu - self.cx) / self.fx
    yc = (vv - self.cy) / self.fy
    if self.orthographic:
        # Parallel rays: shared forward direction, per-pixel origins on the
        # image plane (z = 0 in camera space).
        zeros = mx.zeros_like(uu)
        origins = self.camera_to_world(mx.stack([xc, yc, zeros], axis=-1))
        fwd = mx.array([0.0, 0.0, 1.0]) @ self.R
        fwd = fwd / mx.linalg.norm(fwd)
        dirs_world = mx.broadcast_to(fwd, origins.shape)
        return origins, dirs_world
    xc, yc = self._undistort(xc, yc)  # pixels carry distortion; rays must not
    dirs_cam = mx.stack([xc, yc, mx.ones_like(uu)], axis=-1)
    dirs_world = dirs_cam @ self.R  # == dirs_cam @ R^-T == R^T applied per-vector
    dirs_world = dirs_world / mx.linalg.norm(dirs_world, axis=-1, keepdims=True)
    origins = mx.broadcast_to(self.camera_center, dirs_world.shape)
    return origins, dirs_world

`CameraBatch` `dataclass` ¶

A batch of N pinhole cameras with vectorized projection and rays.

Stores stacked extrinsics/intrinsics (R (N, 3, 3), t (N, 3), fx/fy/cx/cy (N,)) sharing one image size. Indexing returns a single :class:Camera, so it interoperates with the per-camera renderers; the batched methods avoid Python loops for multi-view projection and ray generation (e.g. projecting one point set into every view at once).

Source code in src/mlx3d/cameras/cameras.py

@dataclass
class CameraBatch:
    """A batch of ``N`` pinhole cameras with vectorized projection and rays.

    Stores stacked extrinsics/intrinsics (``R`` ``(N, 3, 3)``, ``t`` ``(N, 3)``,
    ``fx``/``fy``/``cx``/``cy`` ``(N,)``) sharing one image size. Indexing returns
    a single :class:`Camera`, so it interoperates with the per-camera renderers;
    the batched methods avoid Python loops for multi-view projection and ray
    generation (e.g. projecting one point set into every view at once).
    """

    R: mx.array  # (N, 3, 3)
    t: mx.array  # (N, 3)
    fx: mx.array  # (N,)
    fy: mx.array  # (N,)
    cx: mx.array  # (N,)
    cy: mx.array  # (N,)
    width: int
    height: int
    znear: float = 0.01
    zfar: float = 100.0

    @classmethod
    def from_cameras(cls, cameras: list[Camera]) -> "CameraBatch":
        """Stack a list of single :class:`Camera` objects into a batch."""
        if not cameras:
            raise ValueError("from_cameras needs at least one camera.")
        w, h = cameras[0].width, cameras[0].height
        if any((c.width, c.height) != (w, h) for c in cameras):
            raise ValueError("CameraBatch requires all cameras to share an image size.")
        return cls(
            R=mx.stack([mx.array(c.R) for c in cameras]),
            t=mx.stack([mx.array(c.t) for c in cameras]),
            fx=mx.array([float(c.fx) for c in cameras]),
            fy=mx.array([float(c.fy) for c in cameras]),
            cx=mx.array([float(c.cx) for c in cameras]),
            cy=mx.array([float(c.cy) for c in cameras]),
            width=w,
            height=h,
            znear=float(cameras[0].znear),
            zfar=float(cameras[0].zfar),
        )

    def __len__(self) -> int:
        return int(self.R.shape[0])

    def __getitem__(self, i: int) -> Camera:
        return Camera(
            R=self.R[i],
            t=self.t[i],
            fx=float(self.fx[i]),
            fy=float(self.fy[i]),
            cx=float(self.cx[i]),
            cy=float(self.cy[i]),
            width=self.width,
            height=self.height,
            znear=self.znear,
            zfar=self.zfar,
        )

    @property
    def camera_centers(self) -> mx.array:
        """``(N, 3)`` camera positions in world coordinates."""
        return -(mx.swapaxes(self.R, -1, -2) @ self.t[..., None])[..., 0]

    def world_to_camera(self, points: mx.array) -> mx.array:
        """Transform world points ``(P, 3)`` into each camera frame -> ``(N, P, 3)``."""
        return points[None] @ mx.swapaxes(self.R, -1, -2) + self.t[:, None, :]

    def project_points(self, points: mx.array, eps: float = 1e-8) -> tuple[mx.array, mx.array]:
        """Project world points ``(P, 3)`` into all ``N`` views.

        Returns ``(xy, depth)`` of shapes ``(N, P, 2)`` and ``(N, P)``.
        """
        pc = self.world_to_camera(points)  # (N, P, 3)
        z = pc[..., 2]
        inv_z = 1.0 / mx.where(mx.abs(z) < eps, mx.full(z.shape, eps), z)
        u = self.fx[:, None] * pc[..., 0] * inv_z + self.cx[:, None]
        v = self.fy[:, None] * pc[..., 1] * inv_z + self.cy[:, None]
        return mx.stack([u, v], axis=-1), z

    def generate_rays(self) -> tuple[mx.array, mx.array]:
        """Per-pixel rays for every camera.

        Returns ``(origins, directions)`` both ``(N, height, width, 3)`` in world
        coordinates, with normalized directions.
        """
        n, h, w = len(self), self.height, self.width
        uu = mx.broadcast_to((mx.arange(w, dtype=mx.float32) + 0.5)[None, :], (h, w))
        vv = mx.broadcast_to((mx.arange(h, dtype=mx.float32) + 0.5)[:, None], (h, w))
        # Per-camera intrinsics -> direction in camera space, then to world.
        xcam = (uu[None] - self.cx[:, None, None]) / self.fx[:, None, None]  # (N, H, W)
        ycam = (vv[None] - self.cy[:, None, None]) / self.fy[:, None, None]
        dirs_cam = mx.stack([xcam, ycam, mx.ones_like(xcam)], axis=-1)  # (N, H, W, 3)
        dirs_world = dirs_cam.reshape(n, h * w, 3) @ self.R  # (N, HW, 3)
        dirs_world = dirs_world / mx.linalg.norm(dirs_world, axis=-1, keepdims=True)
        dirs_world = dirs_world.reshape(n, h, w, 3)
        origins = mx.broadcast_to(self.camera_centers[:, None, None, :], dirs_world.shape)
        return origins, dirs_world

`camera_centers` `property` ¶

(N, 3) camera positions in world coordinates.

`from_cameras(cameras)` `classmethod` ¶

Stack a list of single :class:Camera objects into a batch.

Source code in src/mlx3d/cameras/cameras.py

@classmethod
def from_cameras(cls, cameras: list[Camera]) -> "CameraBatch":
    """Stack a list of single :class:`Camera` objects into a batch."""
    if not cameras:
        raise ValueError("from_cameras needs at least one camera.")
    w, h = cameras[0].width, cameras[0].height
    if any((c.width, c.height) != (w, h) for c in cameras):
        raise ValueError("CameraBatch requires all cameras to share an image size.")
    return cls(
        R=mx.stack([mx.array(c.R) for c in cameras]),
        t=mx.stack([mx.array(c.t) for c in cameras]),
        fx=mx.array([float(c.fx) for c in cameras]),
        fy=mx.array([float(c.fy) for c in cameras]),
        cx=mx.array([float(c.cx) for c in cameras]),
        cy=mx.array([float(c.cy) for c in cameras]),
        width=w,
        height=h,
        znear=float(cameras[0].znear),
        zfar=float(cameras[0].zfar),
    )

`world_to_camera(points)` ¶

Transform world points (P, 3) into each camera frame -> (N, P, 3).

Source code in src/mlx3d/cameras/cameras.py

def world_to_camera(self, points: mx.array) -> mx.array:
    """Transform world points ``(P, 3)`` into each camera frame -> ``(N, P, 3)``."""
    return points[None] @ mx.swapaxes(self.R, -1, -2) + self.t[:, None, :]

`project_points(points, eps=1e-08)` ¶

Project world points (P, 3) into all N views.

Returns (xy, depth) of shapes (N, P, 2) and (N, P).

Source code in src/mlx3d/cameras/cameras.py

def project_points(self, points: mx.array, eps: float = 1e-8) -> tuple[mx.array, mx.array]:
    """Project world points ``(P, 3)`` into all ``N`` views.

    Returns ``(xy, depth)`` of shapes ``(N, P, 2)`` and ``(N, P)``.
    """
    pc = self.world_to_camera(points)  # (N, P, 3)
    z = pc[..., 2]
    inv_z = 1.0 / mx.where(mx.abs(z) < eps, mx.full(z.shape, eps), z)
    u = self.fx[:, None] * pc[..., 0] * inv_z + self.cx[:, None]
    v = self.fy[:, None] * pc[..., 1] * inv_z + self.cy[:, None]
    return mx.stack([u, v], axis=-1), z

`generate_rays()` ¶

Per-pixel rays for every camera.

Returns (origins, directions) both (N, height, width, 3) in world coordinates, with normalized directions.

Source code in src/mlx3d/cameras/cameras.py

def generate_rays(self) -> tuple[mx.array, mx.array]:
    """Per-pixel rays for every camera.

    Returns ``(origins, directions)`` both ``(N, height, width, 3)`` in world
    coordinates, with normalized directions.
    """
    n, h, w = len(self), self.height, self.width
    uu = mx.broadcast_to((mx.arange(w, dtype=mx.float32) + 0.5)[None, :], (h, w))
    vv = mx.broadcast_to((mx.arange(h, dtype=mx.float32) + 0.5)[:, None], (h, w))
    # Per-camera intrinsics -> direction in camera space, then to world.
    xcam = (uu[None] - self.cx[:, None, None]) / self.fx[:, None, None]  # (N, H, W)
    ycam = (vv[None] - self.cy[:, None, None]) / self.fy[:, None, None]
    dirs_cam = mx.stack([xcam, ycam, mx.ones_like(xcam)], axis=-1)  # (N, H, W, 3)
    dirs_world = dirs_cam.reshape(n, h * w, 3) @ self.R  # (N, HW, 3)
    dirs_world = dirs_world / mx.linalg.norm(dirs_world, axis=-1, keepdims=True)
    dirs_world = dirs_world.reshape(n, h, w, 3)
    origins = mx.broadcast_to(self.camera_centers[:, None, None, :], dirs_world.shape)
    return origins, dirs_world

`focal_to_fov(focal, pixels)` ¶

Field of view in radians from a focal length in pixels.

Source code in src/mlx3d/cameras/cameras.py

def focal_to_fov(focal: float, pixels: int) -> float:
    """Field of view in radians from a focal length in pixels."""
    return 2.0 * math.atan(pixels / (2.0 * focal))

`fov_to_focal(fov, pixels)` ¶

Focal length in pixels from a field of view in radians.

Source code in src/mlx3d/cameras/cameras.py

def fov_to_focal(fov: float, pixels: int) -> float:
    """Focal length in pixels from a field of view in radians."""
    return pixels / (2.0 * math.tan(fov / 2.0))

`look_at(eye, at, up)` ¶

Build OpenCV-convention extrinsics (R, t) for a camera at eye looking at at.

Parameters:

Name	Type	Description	Default
`eye`	`array`	(3,) camera position in world coordinates.	required
`at`	`array`	(3,) target point in world coordinates.	required
`up`	`array`	(3,) approximate world up vector.	required

Returns:

Type	Description
`tuple[array, array]`	`R` (3, 3) and `t` (3,) such that `X_cam = R @ X_world + t`.

Source code in src/mlx3d/cameras/cameras.py

def look_at(eye: mx.array, at: mx.array, up: mx.array) -> tuple[mx.array, mx.array]:
    """Build OpenCV-convention extrinsics ``(R, t)`` for a camera at ``eye`` looking at ``at``.

    Args:
        eye: (3,) camera position in world coordinates.
        at: (3,) target point in world coordinates.
        up: (3,) approximate world up vector.

    Returns:
        ``R`` (3, 3) and ``t`` (3,) such that ``X_cam = R @ X_world + t``.
    """
    eye, at, up = mx.array(eye), mx.array(at), mx.array(up)
    z = at - eye
    z = z / mx.linalg.norm(z)
    x = mx.linalg.cross(z, up)
    x = x / mx.linalg.norm(x)
    y = mx.linalg.cross(z, x)
    R = mx.stack([x, y, z], axis=0)
    t = -(R @ eye)
    return R, t

`look_at_view_transform(dist=1.0, elev=0.0, azim=0.0, at=(0.0, 0.0, 0.0), up=(0.0, 1.0, 0.0), degrees=True)` ¶

Extrinsics for a camera on a sphere around at.

elev is the angle above the xz-plane, azim the angle around +y measured from +z. Returns (R, t) in the OpenCV convention.

Source code in src/mlx3d/cameras/cameras.py

def look_at_view_transform(
    dist: float = 1.0,
    elev: float = 0.0,
    azim: float = 0.0,
    at: tuple[float, float, float] = (0.0, 0.0, 0.0),
    up: tuple[float, float, float] = (0.0, 1.0, 0.0),
    degrees: bool = True,
) -> tuple[mx.array, mx.array]:
    """Extrinsics for a camera on a sphere around ``at``.

    ``elev`` is the angle above the xz-plane, ``azim`` the angle around ``+y``
    measured from ``+z``. Returns ``(R, t)`` in the OpenCV convention.
    """
    if degrees:
        elev = math.radians(elev)
        azim = math.radians(azim)
    x = dist * math.cos(elev) * math.sin(azim)
    y = dist * math.sin(elev)
    z = dist * math.cos(elev) * math.cos(azim)
    eye = mx.array([at[0] + x, at[1] + y, at[2] + z])
    return look_at(eye, mx.array(at), mx.array(up))

`refine_camera(camera, twist)` ¶

Return a copy of camera whose pose is perturbed by an SE(3) twist.

The world-to-camera extrinsics are left-multiplied by exp(twist) (a 6D Lie-algebra vector [v, omega]), so at twist = 0 the camera is unchanged. The result is differentiable w.r.t. twist, which makes camera poses optimizable jointly with a scene (BARF / pose-free NeRF & 3DGS): parameterize each view by a learnable twist, refine the camera, render, and backpropagate.

Intrinsics, image size and distortion are carried over unchanged.

Source code in src/mlx3d/cameras/cameras.py

def refine_camera(camera: Camera, twist: mx.array) -> Camera:
    """Return a copy of ``camera`` whose pose is perturbed by an SE(3) ``twist``.

    The world-to-camera extrinsics are left-multiplied by ``exp(twist)`` (a 6D
    Lie-algebra vector ``[v, omega]``), so at ``twist = 0`` the camera is
    unchanged. The result is differentiable w.r.t. ``twist``, which makes camera
    poses optimizable jointly with a scene (BARF / pose-free NeRF & 3DGS):
    parameterize each view by a learnable twist, refine the camera, render, and
    backpropagate.

    Intrinsics, image size and distortion are carried over unchanged.
    """
    from ..transforms.se3 import Transform3d, se3_exp_map

    delta = se3_exp_map(twist)
    refined = Transform3d(camera.R, camera.t).compose(delta)
    return Camera(
        R=refined.rot,
        t=refined.trans,
        fx=camera.fx,
        fy=camera.fy,
        cx=camera.cx,
        cy=camera.cy,
        width=camera.width,
        height=camera.height,
        znear=camera.znear,
        zfar=camera.zfar,
        orthographic=camera.orthographic,
        distortion=camera.distortion,
        fisheye=camera.fisheye,
    )

mlx3d.cameras¶

mlx3d.cameras ¶

Camera dataclass ¶

K property ¶

camera_center property ¶

world_to_camera_matrix property ¶

orthographic_camera(scale, width, height, R=None, t=None, **kwargs) classmethod ¶

from_fov(fov, width, height, R=None, t=None, degrees=True, **kwargs) classmethod ¶

look_at(eye, at=(0.0, 0.0, 0.0), up=(0.0, 1.0, 0.0), fov=60.0, width=512, height=512, degrees=True, **kwargs) classmethod ¶

world_to_camera(points) ¶

camera_to_world(points) ¶

project_points(points, eps=1e-08) ¶

unproject_points(xy, depth) ¶

generate_rays() ¶

CameraBatch dataclass ¶

camera_centers property ¶

from_cameras(cameras) classmethod ¶

world_to_camera(points) ¶

project_points(points, eps=1e-08) ¶

generate_rays() ¶

focal_to_fov(focal, pixels) ¶

fov_to_focal(fov, pixels) ¶

look_at(eye, at, up) ¶

look_at_view_transform(dist=1.0, elev=0.0, azim=0.0, at=(0.0, 0.0, 0.0), up=(0.0, 1.0, 0.0), degrees=True) ¶

refine_camera(camera, twist) ¶

`mlx3d.cameras` ¶

`Camera` `dataclass` ¶

`K` `property` ¶

`camera_center` `property` ¶

`world_to_camera_matrix` `property` ¶

`orthographic_camera(scale, width, height, R=None, t=None, **kwargs)` `classmethod` ¶

`from_fov(fov, width, height, R=None, t=None, degrees=True, **kwargs)` `classmethod` ¶

`look_at(eye, at=(0.0, 0.0, 0.0), up=(0.0, 1.0, 0.0), fov=60.0, width=512, height=512, degrees=True, **kwargs)` `classmethod` ¶

`world_to_camera(points)` ¶

`camera_to_world(points)` ¶

`project_points(points, eps=1e-08)` ¶

`unproject_points(xy, depth)` ¶

`generate_rays()` ¶

`CameraBatch` `dataclass` ¶

`camera_centers` `property` ¶

`from_cameras(cameras)` `classmethod` ¶

`world_to_camera(points)` ¶

`project_points(points, eps=1e-08)` ¶

`generate_rays()` ¶

`focal_to_fov(focal, pixels)` ¶

`fov_to_focal(fov, pixels)` ¶

`look_at(eye, at, up)` ¶

`look_at_view_transform(dist=1.0, elev=0.0, azim=0.0, at=(0.0, 0.0, 0.0), up=(0.0, 1.0, 0.0), degrees=True)` ¶

`refine_camera(camera, twist)` ¶