Source code for mbodied.types.utils.estimate_intrinsics

import cv2
import numpy as np
from scipy.optimize import least_squares


[docs] def estimate_intrinsic_parameters( unscaled_depth_map: np.ndarray, image: np.ndarray, seg_map: np.ndarray = None, ) -> dict: """Estimate intrinsic camera parameters given an unscaled depth map, image, and optionally a semantic segmentation map. Args: unscaled_depth_map (np.ndarray): Unscaled depth map. image (np.ndarray): Image corresponding to the depth map. seg_map (np.ndarray, optional): Semantic segmentation map. Defaults to None. Returns: dict: Estimated intrinsic parameters including focal lengths and principal point. Example: >>> import cv2 >>> import numpy as np >>> from mbodied.agents.sense.utils.estimate_intrinsics import estimate_intrinsic_parameters >>> unscaled_depth_map = np.random.rand(480, 640) >>> image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8) >>> estimate_intrinsic_parameters(unscaled_depth_map, image) {'fx': 1.0, 'fy': 1.0, 'cx': 320.0, 'cy': 240.0} """ # Extract feature points from the image gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) orb = cv2.ORB_create() keypoints, descriptors = orb.detectAndCompute(gray_image, None) if seg_map is not None: # Filter feature points using the segmentation map filtered_keypoints = [] for kp in keypoints: x, y = int(kp.pt[0]), int(kp.pt[1]) if seg_map[y, x] != 0: # Use only points within the relevant segmented areas filtered_keypoints.append(kp) keypoints = filtered_keypoints if not keypoints: raise ValueError("No valid feature points found.") # Get the 2D coordinates of the filtered feature points points_2d = np.array([kp.pt for kp in keypoints], dtype=np.float32) # Get the 3D coordinates of the filtered feature points using the depth map h, w = unscaled_depth_map.shape fx, fy = 1.0, 1.0 # Initial guess for focal lengths cx, cy = w / 2, h / 2 # Initial guess for principal point points_3d = [] for pt in points_2d: x, y = int(pt[0]), int(pt[1]) z = unscaled_depth_map[y, x] if z > 0: # Valid depth X = (x - cx) * z / fx Y = (y - cy) * z / fy points_3d.append([X, Y, z]) if len(points_3d) == 0: raise ValueError("No valid 3D points found.") points_3d = np.array(points_3d, dtype=np.float32) points_2d = points_2d[: points_3d.shape[0]] # Match the number of points # Wrap points in lists for cv2.calibrateCamera object_points = [points_3d] image_points = [points_2d] # Create initial guess for the intrinsic matrix initial_intrinsic_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32) # Use OpenCV's calibration function with RANSAC _, camera_matrix, dist_coeffs, rvecs, tvecs = cv2.calibrateCamera( object_points, image_points, (w, h), initial_intrinsic_matrix, None, flags=cv2.CALIB_USE_INTRINSIC_GUESS | cv2.CALIB_RATIONAL_MODEL, ) # Bundle adjustment refinement def reprojection_error(params, points_3d, points_2d): fx, fy, cx, cy = params projected_points = [] for p in points_3d: x, y, z = p u = fx * x / z + cx v = fy * y / z + cy projected_points.append([u, v]) projected_points = np.array(projected_points) return (projected_points - points_2d).ravel() initial_params = [camera_matrix[0, 0], camera_matrix[1, 1], camera_matrix[0, 2], camera_matrix[1, 2]] res = least_squares(reprojection_error, initial_params, args=(points_3d, points_2d)) refined_params = res.x return { "fx": refined_params[0], "fy": refined_params[1], "cx": refined_params[2], "cy": refined_params[3], }
if __name__ == "__main__": import doctest doctest.testmod()