mlx3d.datasets¶

`mlx3d.datasets` ¶

`ColmapDataset` `dataclass` ¶

Source code in src/mlx3d/datasets/colmap.py

@dataclass
class ColmapDataset:
    cameras: list[Camera]
    images: ImageCollection
    image_names: list[str]
    points: mx.array  # (P, 3) SfM points
    point_colors: mx.array  # (P, 3) in [0, 1]

    def __len__(self) -> int:
        return len(self.cameras)

    def __getitem__(self, i: int) -> tuple[Camera, mx.array]:
        return self.cameras[i], self.images[i]

    @property
    def scene_extent(self) -> float:
        """Radius of the camera-centers bounding sphere (used to scale
        densification thresholds, as in 3DGS)."""
        centers = np.stack([np.array(c.camera_center) for c in self.cameras])
        center = centers.mean(axis=0)
        return float(np.linalg.norm(centers - center, axis=1).max()) * 1.1

`scene_extent` `property` ¶

Radius of the camera-centers bounding sphere (used to scale densification thresholds, as in 3DGS).

`ImageCollection` ¶

A list-like view over training images with configurable storage.

Always yields (H, W, 3) float32 MLX arrays in [0, 1], regardless of the underlying storage mode.

Source code in src/mlx3d/datasets/images.py

class ImageCollection:
    """A list-like view over training images with configurable storage.

    Always yields (H, W, 3) float32 MLX arrays in [0, 1], regardless of the
    underlying storage mode.
    """

    def __init__(
        self, cache: str = "ram", downscale: int = 1, white_background: bool | None = None
    ):
        if cache not in ("ram", "uint8", "disk"):
            raise ValueError(f"cache must be 'ram', 'uint8' or 'disk', got {cache!r}.")
        self.cache = cache
        self.downscale = downscale
        self.white_background = white_background
        self._items: list = []

    def append_file(self, path: str) -> None:
        """Register an image file, decoding it now or later per the cache mode."""
        if self.cache == "disk":
            self._items.append(path)
        else:
            arr = decode_image_file(path, self.downscale, self.white_background)
            if self.cache == "uint8":
                self._items.append((arr * 255.0 + 0.5).astype(np.uint8))
            else:
                self._items.append(mx.array(arr))

    def shape_of(self, i: int) -> tuple[int, int]:
        """(height, width) of image ``i`` (decodes it in disk mode)."""
        img = self[i]
        return img.shape[0], img.shape[1]

    def __len__(self) -> int:
        return len(self._items)

    def __getitem__(self, i: int) -> mx.array:
        item = self._items[i]
        if self.cache == "ram":
            return item
        if self.cache == "uint8":
            return mx.array(item.astype(np.float32) / 255.0)
        return mx.array(decode_image_file(item, self.downscale, self.white_background))

    def __iter__(self):
        for i in range(len(self)):
            yield self[i]

    @property
    def nbytes_resident(self) -> int:
        """Approximate resident memory of the stored images, in bytes."""
        if self.cache == "disk":
            return 0
        if self.cache == "uint8":
            return sum(a.nbytes for a in self._items)
        return sum(a.size * 4 for a in self._items)

`nbytes_resident` `property` ¶

Approximate resident memory of the stored images, in bytes.

`append_file(path)` ¶

Register an image file, decoding it now or later per the cache mode.

Source code in src/mlx3d/datasets/images.py

def append_file(self, path: str) -> None:
    """Register an image file, decoding it now or later per the cache mode."""
    if self.cache == "disk":
        self._items.append(path)
    else:
        arr = decode_image_file(path, self.downscale, self.white_background)
        if self.cache == "uint8":
            self._items.append((arr * 255.0 + 0.5).astype(np.uint8))
        else:
            self._items.append(mx.array(arr))

`shape_of(i)` ¶

(height, width) of image i (decodes it in disk mode).

Source code in src/mlx3d/datasets/images.py

def shape_of(self, i: int) -> tuple[int, int]:
    """(height, width) of image ``i`` (decodes it in disk mode)."""
    img = self[i]
    return img.shape[0], img.shape[1]

`load_blender(root, split='train', downscale=1, white_background=True, cache='ram')` ¶

Load a Blender-synthetic scene (lego, chair, ...).

Parameters:

Name	Type	Description	Default
`root`	`str`	scene directory containing `transforms_<split>.json`.	required
`split`	`str`	`"train"`, `"val"` or `"test"`.	`'train'`
`downscale`	`int`	integer image downscaling factor.	`1`
`white_background`	`bool`	composite the RGBA renders onto white (else black).	`True`
`cache`	`str`	image storage policy (`"ram"`, `"uint8"` or `"disk"`); see :class:`~mlx3d.datasets.images.ImageCollection`.	`'ram'`

Source code in src/mlx3d/datasets/blender.py

def load_blender(
    root: str,
    split: str = "train",
    downscale: int = 1,
    white_background: bool = True,
    cache: str = "ram",
) -> BlenderDataset:
    """Load a Blender-synthetic scene (lego, chair, ...).

    Args:
        root: scene directory containing ``transforms_<split>.json``.
        split: ``"train"``, ``"val"`` or ``"test"``.
        downscale: integer image downscaling factor.
        white_background: composite the RGBA renders onto white (else black).
        cache: image storage policy (``"ram"``, ``"uint8"`` or ``"disk"``);
            see :class:`~mlx3d.datasets.images.ImageCollection`.
    """
    with open(os.path.join(root, f"transforms_{split}.json")) as f:
        meta = json.load(f)

    cameras: list[Camera] = []
    images = ImageCollection(cache=cache, downscale=downscale, white_background=white_background)
    for frame in meta["frames"]:
        img_path = os.path.join(root, frame["file_path"] + ".png")
        if not os.path.exists(img_path):
            img_path = os.path.join(root, frame["file_path"])
        images.append_file(img_path)
        H, W = images.shape_of(len(images) - 1)

        focal = 0.5 * W / math.tan(0.5 * meta["camera_angle_x"])
        R, t = _c2w_opengl_to_opencv_extrinsics(
            np.asarray(frame["transform_matrix"], dtype=np.float64)
        )
        cameras.append(
            Camera(
                R=R,
                t=t,
                fx=focal,
                fy=focal,
                cx=W / 2.0,
                cy=H / 2.0,
                width=W,
                height=H,
                znear=0.01,
                zfar=100.0,
            )
        )

    return BlenderDataset(cameras=cameras, images=images)

`load_colmap(root, images_dir='images', downscale=1, load_images=True, cache='ram')` ¶

Load a COLMAP scene laid out as root/sparse/0 + root/<images_dir>.

COLMAP already uses the OpenCV camera convention, so extrinsics map directly onto :class:~mlx3d.cameras.Camera.

Parameters:

Name	Type	Description	Default
`cache`	`str`	image storage policy — `"ram"` (float32, fastest), `"uint8"` (4x less memory, negligible per-access cost), or `"disk"` (paths only, decode on access; near-zero resident memory). See :class:`~mlx3d.datasets.images.ImageCollection`.	`'ram'`

Source code in src/mlx3d/datasets/colmap.py

def load_colmap(
    root: str,
    images_dir: str = "images",
    downscale: int = 1,
    load_images: bool = True,
    cache: str = "ram",
) -> ColmapDataset:
    """Load a COLMAP scene laid out as ``root/sparse/0`` + ``root/<images_dir>``.

    COLMAP already uses the OpenCV camera convention, so extrinsics map
    directly onto :class:`~mlx3d.cameras.Camera`.

    Args:
        cache: image storage policy — ``"ram"`` (float32, fastest),
            ``"uint8"`` (4x less memory, negligible per-access cost), or
            ``"disk"`` (paths only, decode on access; near-zero resident
            memory). See :class:`~mlx3d.datasets.images.ImageCollection`.
    """
    sparse = os.path.join(root, "sparse", "0")
    if not os.path.isdir(sparse):
        sparse = os.path.join(root, "sparse")
    cams_meta = _read_cameras_bin(os.path.join(sparse, "cameras.bin"))
    imgs_meta = _read_images_bin(os.path.join(sparse, "images.bin"))
    xyz, rgb = _read_points3d_bin(os.path.join(sparse, "points3D.bin"))

    cameras: list[Camera] = []
    images = ImageCollection(cache=cache, downscale=downscale, white_background=None)
    names: list[str] = []
    for _, meta in sorted(imgs_meta.items(), key=lambda kv: kv[1]["name"]):
        cam = cams_meta[meta["camera_id"]]
        params = cam["params"]
        distortion = None
        fisheye = False
        model = cam["model"]
        if model == "SIMPLE_PINHOLE":
            fx = fy = params[0]
            cx, cy = params[1], params[2]
        elif model == "PINHOLE":
            fx, fy, cx, cy = params[:4]
        elif model == "SIMPLE_RADIAL":
            fx = fy = params[0]
            cx, cy = params[1], params[2]
            distortion = (params[3], 0.0, 0.0, 0.0)
        elif model == "RADIAL":
            fx = fy = params[0]
            cx, cy = params[1], params[2]
            distortion = (params[3], params[4], 0.0, 0.0)
        elif model == "OPENCV":
            fx, fy, cx, cy = params[:4]
            distortion = tuple(params[4:8])  # k1, k2, p1, p2
        elif model == "OPENCV_FISHEYE":
            fx, fy, cx, cy = params[:4]
            distortion = tuple(params[4:8])  # k1, k2, k3, k4
            fisheye = True
        else:  # pragma: no cover
            raise ValueError(f"Unsupported camera model {model}")

        image_path = os.path.join(root, images_dir, meta["name"])
        file_size = _image_size(image_path)
        base_w, base_h = file_size or (cam["width"], cam["height"])
        # Use the actual image files as the training-resolution source of
        # truth. Some public COLMAP scenes ship pre-resized images while
        # retaining full-resolution camera metadata.
        W = max(1, base_w // downscale)
        H = max(1, base_h // downscale)
        sx = W / cam["width"]
        sy = H / cam["height"]
        R = _qvec_to_rotmat(meta["qvec"])
        t = meta["tvec"]

        cameras.append(
            Camera(
                R=mx.array(R.astype(np.float32)),
                t=mx.array(t.astype(np.float32)),
                fx=fx * sx,
                fy=fy * sy,
                cx=cx * sx,
                cy=cy * sy,
                width=int(W),
                height=int(H),
                distortion=distortion,
                fisheye=fisheye,
            )
        )
        if load_images:
            images.append_file(image_path)
        names.append(meta["name"])

    return ColmapDataset(
        cameras=cameras,
        images=images,
        image_names=names,
        points=mx.array(xyz.astype(np.float32)),
        point_colors=mx.array(rgb.astype(np.float32) / 255.0),
    )

`load_instant_ngp(root, transforms_file='transforms.json', downscale=1, white_background=False, cache='ram')` ¶

Load an Instant-NGP / nerfstudio scene.

Parameters:

Name	Type	Description	Default
`root`	`str`	scene directory containing `transforms_file`.	required
`transforms_file`	`str`	name of the transforms JSON (default `transforms.json`).	`'transforms.json'`
`downscale`	`int`	integer image downscaling factor (intrinsics are scaled to match).	`1`
`white_background`	`bool`	composite RGBA images onto white (else black).	`False`
`cache`	`str`	image storage policy; see :class:`~mlx3d.datasets.images.ImageCollection`.	`'ram'`

Returns:

Name	Type	Description
`A`	`BlenderDataset`	class:`~mlx3d.datasets.BlenderDataset` (`cameras` + `images`).

Source code in src/mlx3d/datasets/instant_ngp.py

def load_instant_ngp(
    root: str,
    transforms_file: str = "transforms.json",
    downscale: int = 1,
    white_background: bool = False,
    cache: str = "ram",
) -> BlenderDataset:
    """Load an Instant-NGP / nerfstudio scene.

    Args:
        root: scene directory containing ``transforms_file``.
        transforms_file: name of the transforms JSON (default ``transforms.json``).
        downscale: integer image downscaling factor (intrinsics are scaled to match).
        white_background: composite RGBA images onto white (else black).
        cache: image storage policy; see :class:`~mlx3d.datasets.images.ImageCollection`.

    Returns:
        A :class:`~mlx3d.datasets.BlenderDataset` (``cameras`` + ``images``).
    """
    with open(os.path.join(root, transforms_file)) as f:
        meta = json.load(f)

    cameras: list[Camera] = []
    images = ImageCollection(cache=cache, downscale=downscale, white_background=white_background)

    g_angle = meta.get("camera_angle_x")
    for frame in meta["frames"]:
        images.append_file(_resolve_path(root, frame["file_path"]))
        h, w = images.shape_of(len(images) - 1)

        # Per-frame intrinsics override globals when present.
        def _get(key, default=None):
            return frame.get(key, meta.get(key, default))

        orig_w = float(_get("w", w * downscale))
        orig_h = float(_get("h", h * downscale))
        sx, sy = w / orig_w, h / orig_h  # account for downscaling

        fl_x = _get("fl_x")
        if fl_x is not None:
            fx = float(fl_x) * sx
            fy = float(_get("fl_y", fl_x)) * sy
            cx = float(_get("cx", orig_w / 2.0)) * sx
            cy = float(_get("cy", orig_h / 2.0)) * sy
        else:
            angle = frame.get("camera_angle_x", g_angle)
            if angle is None:
                raise ValueError("transforms.json must provide fl_x or camera_angle_x.")
            fx = fy = 0.5 * w / math.tan(0.5 * float(angle))
            cx, cy = w / 2.0, h / 2.0

        R, t = _c2w_opengl_to_opencv_extrinsics(
            np.asarray(frame["transform_matrix"], dtype=np.float64)
        )
        cameras.append(
            Camera(R=R, t=t, fx=fx, fy=fy, cx=cx, cy=cy, width=w, height=h, znear=0.01, zfar=100.0)
        )

    return BlenderDataset(cameras=cameras, images=images)

mlx3d.datasets¶

mlx3d.datasets ¶

ColmapDataset dataclass ¶

scene_extent property ¶

ImageCollection ¶

nbytes_resident property ¶

append_file(path) ¶

shape_of(i) ¶

load_blender(root, split='train', downscale=1, white_background=True, cache='ram') ¶

load_colmap(root, images_dir='images', downscale=1, load_images=True, cache='ram') ¶

load_instant_ngp(root, transforms_file='transforms.json', downscale=1, white_background=False, cache='ram') ¶

`mlx3d.datasets` ¶

`ColmapDataset` `dataclass` ¶

`scene_extent` `property` ¶

`ImageCollection` ¶

`nbytes_resident` `property` ¶

`append_file(path)` ¶

`shape_of(i)` ¶

`load_blender(root, split='train', downscale=1, white_background=True, cache='ram')` ¶

`load_colmap(root, images_dir='images', downscale=1, load_images=True, cache='ram')` ¶

`load_instant_ngp(root, transforms_file='transforms.json', downscale=1, white_background=False, cache='ram')` ¶