-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcamera.py
323 lines (278 loc) · 13 KB
/
camera.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
from bdb import set_trace
from fileinput import filename
import math
import os
import xml.etree.ElementTree as ET
import torch
from pickletools import uint8
import numpy as np
from scipy.linalg import lstsq
from os.path import splitext, basename
from skimage.transform import rescale, resize, rotate
from scipy.spatial.transform import Rotation as R
def convert_distortion_parms(k1, k2, fl, fx, fy, width, height):
# OpenCV wants radial distortion parameters that are applied to image plane coordinates
# prior to being scaled by fx and fy (so not pixel coordinates). In contrast, k1 and k2
# are defined via Tsai camera calibration http://homepages.inf.ed.ac.uk/rbf/CVonline/LOCAL_COPIES/DIAS1/
K1 = k1 * (fl ** 2.0)
K2 = k2 * (fl ** 4.0)
# Also, K1 and K2 are actually undistortion coefficients. They go from distorted to undistorted image
# plane coordinates. OpenCV wants coefficients that go the other way.
r_values = .01 * np.array(range(1, 101)) * (((width / fx) ** 2.0 + (height / fy) ** 2.0) ** 0.5)
undistorted_r_values = r_values * (1 + K1 * (r_values ** 2.0) + K2 * (r_values ** 4.0))
distortion_factors = r_values / undistorted_r_values
# Given the undistorted and distorted distances, we solve for the new distortion factors via linear regression
k1, k2 = lstsq(np.matrix([undistorted_r_values ** 2.0, undistorted_r_values ** 4.0]).T, np.matrix(distortion_factors - 1.0).T)[0]
return (k1, k2)
def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0):
Rt = np.zeros((4, 4))
Rt[:3, :3] = R.transpose()
Rt[:3, 3] = t
Rt[3, 3] = 1.0
C2W = np.linalg.inv(Rt)
cam_center = C2W[:3, 3]
cam_center = (cam_center + translate) * scale
C2W[:3, 3] = cam_center
Rt = np.linalg.inv(C2W)
return np.float32(Rt)
def extract_agisoft_intrinsic(rt, sensor_idx, resize_factor=1, rot=0):
f = None
K = None
K2 = None
pw = None
py = None
cx = None
cy = None
img_w = None
img_h = None
#print(sensor_idx)
for t in rt.findall("sensor"):
if int(t.get("id")) == sensor_idx:
for p in t.findall("property"):
if p.get("name") == "pixel_width":
pw = float(p.get("value"))
if p.get("name") == "pixel_height":
py = float(p.get("value"))
res_node = t.find("resolution")
img_w = int(res_node.get("width"))
img_h = int(res_node.get("height"))
c_node = t.find("calibration")
f = float(c_node.find("f").text)
if c_node.find("cx") is not None:
cx = img_w / 2.0 + float(c_node.find("cx").text)
cy = img_h / 2.0 + float(c_node.find("cy").text)
else:
cx = img_w / 2.0
cy = img_h / 2.0
if c_node.find("k1") is not None:
K = float(c_node.find("k1").text)
else:
K = 0.0
if c_node.find("k2") is not None:
K2 = float(c_node.find("k2").text)
else:
K2 = 0.0
break
focal_length = f
principal_point = np.array([cx, cy])
pw = pw if pw is not None else 1.0
py = py if py is not None else 1.0
pixel_size = np.array([pw, py])
if resize_factor != 1:
img_w, img_h = math.floor(img_w / resize_factor), math.floor(img_h / resize_factor)
focal_length /= resize_factor
principal_point /= resize_factor
# Transformation from undistorted image plane to distorted image coordinates
#print(pixel_size)
K1, K2 = convert_distortion_parms(K, K2, focal_length * pixel_size[0], focal_length, focal_length,
img_w, img_h)
radial_distortion = np.array([K1, K2]).reshape(-1, )
if rot != 0:
intrinsics = np.array([
[focal_length, 0, principal_point[1]],
[0, focal_length, img_w - principal_point[0]],
[0, 0, 1.0]])
img_size = np.array([img_w, img_h])
else:
intrinsics = np.array([
[focal_length, 0, principal_point[0]],
[0, focal_length, principal_point[1]],
[0, 0, 1.0]])
img_size = np.array([img_h, img_w])
return radial_distortion, intrinsics, img_size
def extract_agisoft_extrinsic(rt, component, img_name, to_meters=False, rot=0):
node = None
sensor_id = None
component = component.find("component")
transform_g = None
if component is not None:
if component.find("transform") is not None:
R_G = np.array([float(val) for val in component.find("transform").find("rotation").text.split()]).reshape((3, 3))
T_G = np.array([float(val) for val in component.find("transform").find("translation").text.split()]).reshape((3,))
transform_g = np.eye(4)
transform_g[:3, :3] = R_G
transform_g[:3, 3] = T_G
#transform_g[:3, 1:3] *= -1 # camera 2 world; opengl axis
#transform_g = np.linalg.inv(transform_g)
for t in rt.findall("camera"):
if t.get("label") == img_name:
sensor_id = int(t.get("sensor_id"))
node = t
break
#print(img_name)
transform = np.array([float(val) for val in node.find("transform").text.split()]).reshape((4, 4))
transform[:3, 1:3] *= -1 # camera 2 world; opengl axis
theta = -1 * rot * 90 * np.pi / 180 # 90 degrees
cos_theta, sin_theta = np.cos(theta), np.sin(theta)
rotation_matrix_z_90 = np.array([
[cos_theta, -sin_theta, 0],
[sin_theta, cos_theta, 0],
[0, 0, 1],
])
transform[:3, :3] = transform[:3, :3].dot(rotation_matrix_z_90)
#print(transform)
extrinsics_opengl = np.linalg.inv(transform)[:3, :4]
R_opengl = extrinsics_opengl[:3, :3]
t_opengl = extrinsics_opengl[:3, 3]
# Define the coordinate transformation matrix from OpenGL to COLMAP
# This flips the Y and Z axis
coord_transform = np.array([[1, 0, 0],
[0, -1, 0],
[0, 0, -1]])
# Apply the coordinate transformation
R_colmap = np.dot(coord_transform, R_opengl)
t_colmap = np.dot(coord_transform, t_opengl)
extrinsic_colmap = np.eye(4)
extrinsic_colmap[:3, :3] = R_colmap
extrinsic_colmap[:3, 3] = t_colmap
c2w = get_camera_to_world_transformation(extrinsic_colmap)
#print(333, c2w[:3, :4])
center = c2w[:3, 3]
view_direction = c2w[:3, :3].dot(np.array([0, 0, 1]))
return extrinsic_colmap[:3, :4], center, view_direction, sensor_id, transform_g
def load_camera(calib_fname, img_name, resize_factor=1, to_meters=False, rt=0):
tree = ET.parse(calib_fname)
root = tree.getroot()
root = root.find("chunk")
#component = root.find("components")
extrinsics, center, view_direction, id, trans_g = extract_agisoft_extrinsic(root.find("cameras"), root.find("components"), img_name, to_meters, rot=rt)
radial_distortion, intrinsics, img_size = extract_agisoft_intrinsic(root.find("sensors"), id, resize_factor, rot=rt)
camera = {
'intrinsics': intrinsics,
'extrinsics': extrinsics,
'radial_distortion': radial_distortion,
'camera_center': center,
'view_direction': view_direction,
'image_size': img_size,
'name': img_name
}
return camera, trans_g
def get_camera_to_world_transformation(extrinsics):
'''
Get transformation matrix (3,4) that transforms points in the local camera coordinate systems
to the world coordinate system.
'''
R = extrinsics[:3, :3]
t = extrinsics[:3, 3]
return np.hstack((R.T, -R.T.dot(t)[:,np.newaxis]))
def rotate_image(image, camera=None, angle = 90):
image = rotate(image, angle, resize=True)
return image
def rotate_image_cam(image, camera=None, angle = 90):
image = rotate(image, angle, resize=True)
if camera is not None:
Rt = np.array([
[ 0, 1, 0 ],
[-1, 0, camera['image_size'][1] ],
[ 0, 0, 1 ]
])
theta = - angle * np.pi / 180 # 90 degrees
cos_theta, sin_theta = np.cos(theta), np.sin(theta)
rotation_matrix_z_90 = np.array([
[cos_theta, -sin_theta, 0],
[sin_theta, cos_theta, 0],
[0, 0, 1],
])
# rotation_matrix_z_90 = np.array([
# [1, 0, 0],
# [0, cos_theta, sin_theta],
# [-sin_theta, 0, cos_theta],
# ])
#camera['extrinsics'][:3, :3] = camera['extrinsics'][:3, :3].dot(rotation_matrix_z_90.T)
#camera['extrinsics'][:3, :3] = camera['extrinsics'][:3, :3].dot(rotation_matrix_z_90)
#camera['extrinsics'][:3, :3] = rotation_matrix_z_90.T.dot(camera['extrinsics'][:3, :3])
#camera['extrinsics'][:3, 3] = camera['extrinsics'][:3, 3].dot(rotation_matrix_z_90)
camera = dict(camera)
fx, fy = camera['intrinsics'][0, 0], camera['intrinsics'][1, 1]
camera['intrinsics'] = Rt.dot(camera['intrinsics'])
camera['intrinsics'][0, 0] = fy
camera['intrinsics'][1, 1] = fx
camera['intrinsics'][0, 1] = 0.0
camera['intrinsics'][1, 0] = 0.0
camera['image_size'] = camera['image_size'][::-1]
return image, camera
else:
return image
def scale_image(image, scale_factor, camera=None):
img = rescale(image, scale_factor, channel_axis=2, anti_aliasing=True)
if camera is None:
return img
else:
scale_mat = np.eye(3)
scale_mat[0,0] = scale_mat[1,1] = scale_factor
camera['intrinsics'] = scale_mat.dot(camera['intrinsics'])
return img, camera
def perspective_project(points, camera_intrinsics, camera_extrinsics, radial_distortion, eps=1e-7):
'''
Projection of 3D points into the image plane using a perspective transformation.
:param points: array of 3D points (num_points X 3)
:return: array of projected 2D points (num_points X 2)
'''
num_points, _ = points.shape
ones = np.ones((num_points, 1))
points_homogeneous = np.concatenate((points, ones), axis=-1)
# Transformation from the world coordinate system to the image coordinate system using the camera extrinsic rotation (R) and translation (T)
points_image = camera_extrinsics.dot(points_homogeneous.T).T
# Transformation from 3D camera coordinate system to the undistorted image plane
z_coords = points_image[:,2]
z_coords[np.where(np.abs(z_coords) < eps)] = 1.0
points_image[:,0] = points_image[:,0] / z_coords
points_image[:,1] = points_image[:,1] / z_coords
# Transformation from undistorted image plane to distorted image coordinates
K1, K2 = radial_distortion[0], radial_distortion[1]
r2 = points_image[:,0]**2 + points_image[:,1]**2
r4 = r2**2
radial_distortion_factor = (1 + K1*r2 + K2*r4)
points_image[:,0] = points_image[:,0]*radial_distortion_factor
points_image[:,1] = points_image[:,1]*radial_distortion_factor
points_image[:,2] = 1.0
# Transformation from distorted image coordinates to the final image coordinates with the camera intrinsics
points_image = camera_intrinsics.dot(points_image.T).T
return points_image
def batch_perspective_project(points, camera_intrinsics, camera_extrinsics, radial_distortion, eps=1e-7):
device = points.device
batch_size, num_points, _ = points.shape
points = points.transpose(1, 2)
ones = torch.ones(batch_size, 1, num_points).to(device)
points_homogeneous = torch.cat((points, ones), axis=-2) # (batch_size, 4, num_points)
# Transformation from the world coordinate system to the image coordinate system using the camera extrinsic rotation (R) and translation (T)
points_image = camera_extrinsics.bmm(points_homogeneous) # (batch_size, 3, num_points)
# Transformation from 3D camera coordinate system to the undistorted image plane
mask = (points_image.abs() < eps)
mask[:,:2,:] = False
points_image[mask] = 1.0 # Avoid division by zero
points_image_x = points_image[:,0,:] / points_image[:,2,:]
points_image_y = points_image[:,1,:] / points_image[:,2,:]
# Transformation from undistorted image plane to distorted image coordinates
K1, K2 = radial_distortion[:,0], radial_distortion[:,1] # (batch_size)
r2 = points_image_x**2 + points_image_y**2 # (batch_size, num_points)
r4 = r2**2
radial_distortion_factor = (1 + K1[:, None]*r2 + K2[:, None]*r4) # (batch_size, num_points)
points_image_x = points_image_x*radial_distortion_factor
points_image_y = points_image_y*radial_distortion_factor
points_image_z = torch.ones_like(points_image[:,2,:])
points_image = torch.cat((points_image_x[:, None, :], points_image_y[:, None, :], points_image_z[:, None, :]), dim=1)
# Transformation from distorted image coordinates to the final image coordinates with the camera intrinsics
points_image = camera_intrinsics.bmm(points_image) # (batch_size, 3, num_points)
points_image = torch.transpose(points_image, 1, 2)[:,:,:2] # (batch_size, num_points, 2)
return points_image