""" Back-projection utilities: depth map → 3D point cloud. DepthPro outputs metric depth (meters) and an estimated focal length. Using the standard pinhole camera model, each pixel can be back-projected into a 3D point relative to the camera centre. """ from __future__ import annotations from typing import Optional, Tuple import numpy as np def depth_to_point_cloud( depth: np.ndarray, focal_length: float, principal_point: Optional[Tuple[float, float]] = None, *, mask: Optional[np.ndarray] = None, sample_step: int = 1, ) -> np.ndarray: """ Back-project a metric depth map into a 3D point cloud. Parameters ---------- depth : np.ndarray (H, W) float array of metric depths in meters. focal_length : float Focal length in pixels (for the resolution of *depth*). DepthPro returns this automatically via ``DepthResult.focal_length``. principal_point : (cx, cy), optional Principal point in pixel coordinates. Defaults to the image centre ``(W/2, H/2)``. mask : np.ndarray, optional (H, W) boolean array. Only pixels where ``mask == True`` are kept. Useful for removing sky/background, invalid depths, etc. sample_step : int, default 1 Spatial sub-sampling step. ``2`` keeps every 2nd pixel (75 % reduction), ``4`` keeps every 4th (93.75 % reduction). Handy for real-time viz. Returns ------- points : np.ndarray (N, 3) float array of 3D points in the camera coordinate frame. ``+Z`` points forward (into the scene), ``+X`` is right, ``+Y`` is down (standard image convention). Notes ----- DepthPro assumes square pixels (aspect ratio = 1) and therefore a single focal length value is sufficient: ``fx == fy == focal_length``. The standard pinhole projection equations are:: X = (u - cx) * Z / fx Y = (v - cy) * Z / fy Z = depth[v, u] where ``(u, v)`` are pixel column/row indices. """ depth = np.asarray(depth, dtype=np.float32) H, W = depth.shape if principal_point is None: cx, cy = W / 2.0, H / 2.0 else: cx, cy = float(principal_point[0]), float(principal_point[1]) fx = fy = float(focal_length) # Build pixel grid — sample every sample_step pixel v_idx = np.arange(0, H, sample_step) u_idx = np.arange(0, W, sample_step) u, v = np.meshgrid(u_idx, v_idx) Z = depth[v_idx[:, None], u_idx[None, :]] # Remove invalid / zero depths valid = Z > 0.0 if mask is not None: mask = np.asarray(mask) if mask.shape != (H, W): raise ValueError(f"mask shape {mask.shape} does not match depth shape {(H, W)}") # Down-sample mask to match the sampled grid valid &= mask[v_idx[:, None], u_idx[None, :]] u = u[valid] v = v[valid] Z = Z[valid] X = (u - cx) * Z / fx Y = (v - cy) * Z / fy points = np.stack([X, Y, Z], axis=-1).astype(np.float32) return points def rgbd_to_point_cloud( depth: np.ndarray, rgb: np.ndarray, focal_length: float, principal_point: Optional[Tuple[float, float]] = None, *, mask: Optional[np.ndarray] = None, sample_step: int = 1, ) -> Tuple[np.ndarray, np.ndarray]: """ Back-project an RGB-D pair into a coloured 3D point cloud. Parameters ---------- depth : np.ndarray (H, W) metric depth map. rgb : np.ndarray (H, W, 3) uint8 RGB image. focal_length : float Estimated focal length in pixels. principal_point : (cx, cy), optional Defaults to image centre. mask : np.ndarray, optional Boolean mask selecting pixels to keep. sample_step : int, default 1 Spatial sub-sampling step. Returns ------- points : np.ndarray (N, 3) float32 3D points. colors : np.ndarray (N, 3) uint8 RGB colours aligned with *points*. """ depth = np.asarray(depth) rgb = np.asarray(rgb) if depth.shape[:2] != rgb.shape[:2]: raise ValueError( f"depth shape {depth.shape} and rgb shape {rgb.shape} must have same H×W" ) H, W = depth.shape if principal_point is None: cx, cy = W / 2.0, H / 2.0 else: cx, cy = float(principal_point[0]), float(principal_point[1]) fx = fy = float(focal_length) v_idx = np.arange(0, H, sample_step) u_idx = np.arange(0, W, sample_step) u, v = np.meshgrid(u_idx, v_idx) Z = depth[v_idx[:, None], u_idx[None, :]] colors_sampled = rgb[v_idx[:, None], u_idx[None, :]] valid = Z > 0.0 if mask is not None: mask = np.asarray(mask) valid &= mask[v_idx[:, None], u_idx[None, :]] u = u[valid] v = v[valid] Z = Z[valid] colors = colors_sampled[valid] X = (u - cx) * Z / fx Y = (v - cy) * Z / fy points = np.stack([X, Y, Z], axis=-1).astype(np.float32) colors = np.asarray(colors, dtype=np.uint8) return points, colors def normals_from_depth( depth: np.ndarray, focal_length: float, principal_point: Optional[Tuple[float, float]] = None, ) -> np.ndarray: """ Compute per-pixel surface normals directly from the depth map. This is a fast, approximate normal estimator that works well for visualisation or as input to downstream surface-reconstruction methods (e.g. Poisson, NKSR). Parameters ---------- depth : np.ndarray (H, W) metric depth map. focal_length : float Focal length in pixels. principal_point : (cx, cy), optional Defaults to image centre. Returns ------- normals : np.ndarray (H, W, 3) float32 array of **unoriented** unit normals. ``normals[v, u]`` is the normal at pixel ``(u, v)``. """ depth = np.asarray(depth, dtype=np.float64) H, W = depth.shape if principal_point is None: cx, cy = W / 2.0, H / 2.0 else: cx, cy = float(principal_point[0]), float(principal_point[1]) fx = fy = float(focal_length) # Compute 3D coordinates for every pixel u = np.arange(W) v = np.arange(H) u, v = np.meshgrid(u, v) Z = depth X = (u - cx) * Z / fx Y = (v - cy) * Z / fy # Cross-product of neighbour vectors → normal # Forward differences (with edge padding) dx = np.zeros_like(Z) dy = np.zeros_like(Z) dx[:, :-1] = (X[:, 1:] - X[:, :-1]) * (Z[:, :-1] > 0) * (Z[:, 1:] > 0) dy[:-1, :] = (Y[1:, :] - Y[:-1, :]) * (Z[:-1, :] > 0) * (Z[1:, :] > 0) # Average with backward differences for smoother edges dx[:, 1:] += (X[:, 1:] - X[:, :-1]) * (Z[:, :-1] > 0) * (Z[:, 1:] > 0) dy[1:, :] += (Y[1:, :] - Y[:-1, :]) * (Z[:-1, :] > 0) * (Z[1:, :] > 0) dx[:, 1:-1] *= 0.5 dy[1:-1, :] *= 0.5 # Central differences in the interior dx[:, 1:-1] = (X[:, 2:] - X[:, :-2]) / 2.0 dy[1:-1, :] = (Y[2:, :] - Y[:-2, :]) / 2.0 # Vectors in 3D vx = np.stack([dx, np.zeros_like(dx), np.zeros_like(dx)], axis=-1) vy = np.stack([np.zeros_like(dy), dy, np.zeros_like(dy)], axis=-1) # More accurate: use the actual 3D neighbour differences dX = np.zeros_like(X) dY = np.zeros_like(Y) dZ = np.zeros_like(Z) dX[:, :-1] = X[:, 1:] - X[:, :-1] dY[:, :-1] = Y[:, 1:] - Y[:, :-1] dZ[:, :-1] = Z[:, 1:] - Z[:, :-1] dX[:-1, :] += X[1:, :] - X[:-1, :] dY[:-1, :] += Y[1:, :] - Y[:-1, :] dZ[:-1, :] += Z[1:, :] - Z[:-1, :] # Use central diff version grad_x = np.zeros((H, W, 3), dtype=np.float32) grad_y = np.zeros((H, W, 3), dtype=np.float32) grad_x[:, :-1, 0] = X[:, 1:] - X[:, :-1] grad_x[:, :-1, 1] = Y[:, 1:] - Y[:, :-1] grad_x[:, :-1, 2] = Z[:, 1:] - Z[:, :-1] grad_y[:-1, :, 0] = X[1:, :] - X[:-1, :] grad_y[:-1, :, 1] = Y[1:, :] - Y[:-1, :] grad_y[:-1, :, 2] = Z[1:, :] - Z[:-1, :] # Average with the opposite direction for interior pixels grad_x[:, 1:, :] += np.stack([X[:, :-1] - X[:, 1:], Y[:, :-1] - Y[:, 1:], Z[:, :-1] - Z[:, 1:]], axis=-1) grad_y[1:, :, :] += np.stack([X[:-1, :] - X[1:, :], Y[:-1, :] - Y[1:, :], Z[:-1, :] - Z[1:, :]], axis=-1) # Cross product for normal normals = np.cross(grad_x, grad_y) # Normalise norm = np.linalg.norm(normals, axis=-1, keepdims=True) normals = np.where(norm > 1e-8, normals / norm, 0.0) return normals.astype(np.float32)