| """ |
| Back-projection utilities: depth map → 3D point cloud. |
| |
| DepthPro outputs metric depth (meters) and an estimated focal length. |
| Using the standard pinhole camera model, each pixel can be back-projected |
| into a 3D point relative to the camera centre. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from typing import Optional, Tuple |
|
|
| import numpy as np |
|
|
|
|
| def depth_to_point_cloud( |
| depth: np.ndarray, |
| focal_length: float, |
| principal_point: Optional[Tuple[float, float]] = None, |
| *, |
| mask: Optional[np.ndarray] = None, |
| sample_step: int = 1, |
| ) -> np.ndarray: |
| """ |
| Back-project a metric depth map into a 3D point cloud. |
| |
| Parameters |
| ---------- |
| depth : np.ndarray |
| (H, W) float array of metric depths in meters. |
| focal_length : float |
| Focal length in pixels (for the resolution of *depth*). |
| DepthPro returns this automatically via ``DepthResult.focal_length``. |
| principal_point : (cx, cy), optional |
| Principal point in pixel coordinates. Defaults to the image centre |
| ``(W/2, H/2)``. |
| mask : np.ndarray, optional |
| (H, W) boolean array. Only pixels where ``mask == True`` are kept. |
| Useful for removing sky/background, invalid depths, etc. |
| sample_step : int, default 1 |
| Spatial sub-sampling step. ``2`` keeps every 2nd pixel (75 % reduction), |
| ``4`` keeps every 4th (93.75 % reduction). Handy for real-time viz. |
| |
| Returns |
| ------- |
| points : np.ndarray |
| (N, 3) float array of 3D points in the camera coordinate frame. |
| ``+Z`` points forward (into the scene), ``+X`` is right, ``+Y`` is |
| down (standard image convention). |
| |
| Notes |
| ----- |
| DepthPro assumes square pixels (aspect ratio = 1) and therefore a single |
| focal length value is sufficient: ``fx == fy == focal_length``. |
| |
| The standard pinhole projection equations are:: |
| |
| X = (u - cx) * Z / fx |
| Y = (v - cy) * Z / fy |
| Z = depth[v, u] |
| |
| where ``(u, v)`` are pixel column/row indices. |
| """ |
| depth = np.asarray(depth, dtype=np.float32) |
| H, W = depth.shape |
|
|
| if principal_point is None: |
| cx, cy = W / 2.0, H / 2.0 |
| else: |
| cx, cy = float(principal_point[0]), float(principal_point[1]) |
|
|
| fx = fy = float(focal_length) |
|
|
| |
| v_idx = np.arange(0, H, sample_step) |
| u_idx = np.arange(0, W, sample_step) |
| u, v = np.meshgrid(u_idx, v_idx) |
|
|
| Z = depth[v_idx[:, None], u_idx[None, :]] |
|
|
| |
| valid = Z > 0.0 |
| if mask is not None: |
| mask = np.asarray(mask) |
| if mask.shape != (H, W): |
| raise ValueError(f"mask shape {mask.shape} does not match depth shape {(H, W)}") |
| |
| valid &= mask[v_idx[:, None], u_idx[None, :]] |
|
|
| u = u[valid] |
| v = v[valid] |
| Z = Z[valid] |
|
|
| X = (u - cx) * Z / fx |
| Y = (v - cy) * Z / fy |
|
|
| points = np.stack([X, Y, Z], axis=-1).astype(np.float32) |
| return points |
|
|
|
|
| def rgbd_to_point_cloud( |
| depth: np.ndarray, |
| rgb: np.ndarray, |
| focal_length: float, |
| principal_point: Optional[Tuple[float, float]] = None, |
| *, |
| mask: Optional[np.ndarray] = None, |
| sample_step: int = 1, |
| ) -> Tuple[np.ndarray, np.ndarray]: |
| """ |
| Back-project an RGB-D pair into a coloured 3D point cloud. |
| |
| Parameters |
| ---------- |
| depth : np.ndarray |
| (H, W) metric depth map. |
| rgb : np.ndarray |
| (H, W, 3) uint8 RGB image. |
| focal_length : float |
| Estimated focal length in pixels. |
| principal_point : (cx, cy), optional |
| Defaults to image centre. |
| mask : np.ndarray, optional |
| Boolean mask selecting pixels to keep. |
| sample_step : int, default 1 |
| Spatial sub-sampling step. |
| |
| Returns |
| ------- |
| points : np.ndarray |
| (N, 3) float32 3D points. |
| colors : np.ndarray |
| (N, 3) uint8 RGB colours aligned with *points*. |
| """ |
| depth = np.asarray(depth) |
| rgb = np.asarray(rgb) |
| if depth.shape[:2] != rgb.shape[:2]: |
| raise ValueError( |
| f"depth shape {depth.shape} and rgb shape {rgb.shape} must have same H×W" |
| ) |
|
|
| H, W = depth.shape |
| if principal_point is None: |
| cx, cy = W / 2.0, H / 2.0 |
| else: |
| cx, cy = float(principal_point[0]), float(principal_point[1]) |
|
|
| fx = fy = float(focal_length) |
|
|
| v_idx = np.arange(0, H, sample_step) |
| u_idx = np.arange(0, W, sample_step) |
| u, v = np.meshgrid(u_idx, v_idx) |
|
|
| Z = depth[v_idx[:, None], u_idx[None, :]] |
| colors_sampled = rgb[v_idx[:, None], u_idx[None, :]] |
|
|
| valid = Z > 0.0 |
| if mask is not None: |
| mask = np.asarray(mask) |
| valid &= mask[v_idx[:, None], u_idx[None, :]] |
|
|
| u = u[valid] |
| v = v[valid] |
| Z = Z[valid] |
| colors = colors_sampled[valid] |
|
|
| X = (u - cx) * Z / fx |
| Y = (v - cy) * Z / fy |
|
|
| points = np.stack([X, Y, Z], axis=-1).astype(np.float32) |
| colors = np.asarray(colors, dtype=np.uint8) |
| return points, colors |
|
|
|
|
| def normals_from_depth( |
| depth: np.ndarray, |
| focal_length: float, |
| principal_point: Optional[Tuple[float, float]] = None, |
| ) -> np.ndarray: |
| """ |
| Compute per-pixel surface normals directly from the depth map. |
| |
| This is a fast, approximate normal estimator that works well for |
| visualisation or as input to downstream surface-reconstruction methods |
| (e.g. Poisson, NKSR). |
| |
| Parameters |
| ---------- |
| depth : np.ndarray |
| (H, W) metric depth map. |
| focal_length : float |
| Focal length in pixels. |
| principal_point : (cx, cy), optional |
| Defaults to image centre. |
| |
| Returns |
| ------- |
| normals : np.ndarray |
| (H, W, 3) float32 array of **unoriented** unit normals. |
| ``normals[v, u]`` is the normal at pixel ``(u, v)``. |
| """ |
| depth = np.asarray(depth, dtype=np.float64) |
| H, W = depth.shape |
|
|
| if principal_point is None: |
| cx, cy = W / 2.0, H / 2.0 |
| else: |
| cx, cy = float(principal_point[0]), float(principal_point[1]) |
|
|
| fx = fy = float(focal_length) |
|
|
| |
| u = np.arange(W) |
| v = np.arange(H) |
| u, v = np.meshgrid(u, v) |
|
|
| Z = depth |
| X = (u - cx) * Z / fx |
| Y = (v - cy) * Z / fy |
|
|
| |
| |
| dx = np.zeros_like(Z) |
| dy = np.zeros_like(Z) |
|
|
| dx[:, :-1] = (X[:, 1:] - X[:, :-1]) * (Z[:, :-1] > 0) * (Z[:, 1:] > 0) |
| dy[:-1, :] = (Y[1:, :] - Y[:-1, :]) * (Z[:-1, :] > 0) * (Z[1:, :] > 0) |
|
|
| |
| dx[:, 1:] += (X[:, 1:] - X[:, :-1]) * (Z[:, :-1] > 0) * (Z[:, 1:] > 0) |
| dy[1:, :] += (Y[1:, :] - Y[:-1, :]) * (Z[:-1, :] > 0) * (Z[1:, :] > 0) |
| dx[:, 1:-1] *= 0.5 |
| dy[1:-1, :] *= 0.5 |
|
|
| |
| dx[:, 1:-1] = (X[:, 2:] - X[:, :-2]) / 2.0 |
| dy[1:-1, :] = (Y[2:, :] - Y[:-2, :]) / 2.0 |
|
|
| |
| vx = np.stack([dx, np.zeros_like(dx), np.zeros_like(dx)], axis=-1) |
| vy = np.stack([np.zeros_like(dy), dy, np.zeros_like(dy)], axis=-1) |
|
|
| |
| dX = np.zeros_like(X) |
| dY = np.zeros_like(Y) |
| dZ = np.zeros_like(Z) |
|
|
| dX[:, :-1] = X[:, 1:] - X[:, :-1] |
| dY[:, :-1] = Y[:, 1:] - Y[:, :-1] |
| dZ[:, :-1] = Z[:, 1:] - Z[:, :-1] |
|
|
| dX[:-1, :] += X[1:, :] - X[:-1, :] |
| dY[:-1, :] += Y[1:, :] - Y[:-1, :] |
| dZ[:-1, :] += Z[1:, :] - Z[:-1, :] |
|
|
| |
| grad_x = np.zeros((H, W, 3), dtype=np.float32) |
| grad_y = np.zeros((H, W, 3), dtype=np.float32) |
|
|
| grad_x[:, :-1, 0] = X[:, 1:] - X[:, :-1] |
| grad_x[:, :-1, 1] = Y[:, 1:] - Y[:, :-1] |
| grad_x[:, :-1, 2] = Z[:, 1:] - Z[:, :-1] |
|
|
| grad_y[:-1, :, 0] = X[1:, :] - X[:-1, :] |
| grad_y[:-1, :, 1] = Y[1:, :] - Y[:-1, :] |
| grad_y[:-1, :, 2] = Z[1:, :] - Z[:-1, :] |
|
|
| |
| grad_x[:, 1:, :] += np.stack([X[:, :-1] - X[:, 1:], Y[:, :-1] - Y[:, 1:], Z[:, :-1] - Z[:, 1:]], axis=-1) |
| grad_y[1:, :, :] += np.stack([X[:-1, :] - X[1:, :], Y[:-1, :] - Y[1:, :], Z[:-1, :] - Z[1:, :]], axis=-1) |
|
|
| |
| normals = np.cross(grad_x, grad_y) |
|
|
| |
| norm = np.linalg.norm(normals, axis=-1, keepdims=True) |
| normals = np.where(norm > 1e-8, normals / norm, 0.0) |
|
|
| return normals.astype(np.float32) |
|
|