How to convert from 2D coordinates to 3D coordinates using realsense camera

salvacmp · January 27, 2025, 3:52pm

If you are working on a pick-and-place project with the requirement of being able to pick up different heights, this can be done by converting your 2D pixel coordinates to 3D coordinates.

Requirements:

Intel RealSense SDK 2.0
pyrealsense2

Code:

Camera setup (Python)

import pyrealsense2 as rs

pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)

frames = pipeline.wait_for_frames()
color_frame = frames.get_color_frame()
depth_frame = frames.get_depth_frame()
color_image = color_frame.get_data()
depth_sensor = self.profile.get_device().first_depth_sensor()
depth_scale = depth_sensor.get_depth_scale()

align = rs.align(rs.stream.color)
aligned_frames = align.process(frames)
depth_frame = aligned_frames.get_depth_frame()
if not color_frame:
  continue

Deproject pixel

x_center,y_center = 320,240 # pixel coordinates for conversion
color_pixel = (int(x_center), int(y_center))

depth_min = 0.11
depth_max = 1.0
depth_intrinsic = profile.get_stream(rs.stream.depth).as_video_stream_profile().get_intrinsics()
color_intrinsic = profile.get_stream(rs.stream.color).as_video_stream_profile().get_intrinsics()
depth_to_color_extrinsic =  profile.get_stream(rs.stream.depth).as_video_stream_profile().get_extrinsics_to(profile.get_stream(rs.stream.color))
color_to_depth_extrinsic =  profile.get_stream(rs.stream.color).as_video_stream_profile().get_extrinsics_to(profile.get_stream(rs.stream.depth))

depth_pixel = rs.rs2_project_color_pixel_to_depth_pixel(
depth_frame.get_data(), 
depth_scale,
depth_min, depth_max, 
depth_intrinsic, 
color_intrinsic,
depth_to_color_extrinsic, 
color_to_depth_extrinsic,
color_pixel
)
x_depth_pixel, y_depth_pixel = depth_pixel

# ignore error when the captured depth is over the limit
try:
  depth = depth_frame.get_distance(int(x_depth_pixel), int(y_depth_pixel))
except:
  pass

depth_point = rs.rs2_deproject_pixel_to_point(depth_intrinsic, [int(x_depth_pixel), int(y_depth_pixel)], depth)

# use the values from the variables below (in millimeters)
x_depth_point, y_depth_point, z_depth_point = depth_point

Calculate the robot coordinates

camera_coords = np.array([calculated_x, calculated_y, 1])
R = np.array([[1, 0, 0], 
             [0, 1, 0], 
             [0, 0, 1]]) 
# Change the values according to the robot's capture position in X, Y
T = np.array([-0.15170, -0.54743, 0])  #Robot position

# # Transformation: robot_coords = R @ camera_coords + T
robot_coordinates = np.dot(R, camera_coords) + T
robot_x = robot_coordinates[0]
robot_y = robot_coordinates[1]