Source code for ml3d.vis.boundingbox

import numpy as np
import cloudViewer as cv3d
from PIL import Image, ImageDraw


[docs]class BoundingBox3D: """Class that defines an axially-oriented bounding box.""" next_id = 1
[docs] def __init__(self, center, front, up, left, size, label_class, confidence, meta=None, show_class=False, show_confidence=False, show_meta=None, identifier=None, arrow_length=1.0): """Creates a bounding box. Front, up, left define the axis of the box and must be normalized and mutually orthogonal. Args: center: (x, y, z) that defines the center of the box. front: normalized (i, j, k) that defines the front direction of the box. up: normalized (i, j, k) that defines the up direction of the box. left: normalized (i, j, k) that defines the left direction of the box. size: (width, height, depth) that defines the size of the box, as measured from edge to edge. label_class: integer specifying the classification label. If an LUT is specified in create_lines() this will be used to determine the color of the box. confidence: confidence level of the box. meta: a user-defined string (optional). show_class: displays the class label in text near the box (optional). show_confidence: displays the confidence value in text near the box (optional). show_meta: displays the meta string in text near the box (optional). identifier: a unique integer that defines the id for the box (optional, will be generated if not provided). arrow_length: the length of the arrow in the front_direct. Set to zero to disable the arrow (optional). """ assert (len(center) == 3) assert (len(front) == 3) assert (len(up) == 3) assert (len(left) == 3) assert (len(size) == 3) self.center = np.array(center, dtype="float32") self.front = np.array(front, dtype="float32") self.up = np.array(up, dtype="float32") self.left = np.array(left, dtype="float32") self.size = size self.label_class = label_class self.confidence = confidence self.meta = meta self.show_class = show_class self.show_confidence = show_confidence self.show_meta = show_meta if identifier is not None: self.identifier = identifier else: self.identifier = "box:" + str(BoundingBox3D.next_id) BoundingBox3D.next_id += 1 self.arrow_length = arrow_length
def __repr__(self): s = str(self.identifier) + " (class=" + str( self.label_class) + ", conf=" + str(self.confidence) if self.meta is not None: s = s + ", meta=" + str(self.meta) s = s + ")" return s
[docs] @staticmethod def create_lines(boxes, lut=None, out_format="lineset"): """Creates a LineSet that can be used to render the boxes. Args: boxes: the list of bounding boxes lut: a ml3d.vis.LabelLUT that is used to look up the color based on the label_class argument of the BoundingBox3D constructor. If not provided, a color of 50% grey will be used. (optional) out_format (str): Output format. Can be "lineset" (default) for the Open3D lineset or "dict" for a dictionary of lineset properties. Returns: For out_format == "lineset": open3d.geometry.LineSet For out_format == "dict": Dictionary of lineset properties ("vertex_positions", "line_indices", "line_colors", "bbox_labels", "bbox_confidences"). """ if out_format not in ('lineset', 'dict'): raise ValueError("Please specify an output_format of 'lineset' " "(default) or 'dict'.") nverts = 14 nlines = 17 points = np.zeros((nverts * len(boxes), 3), dtype="float32") indices = np.zeros((nlines * len(boxes), 2), dtype="int32") colors = np.zeros((nlines * len(boxes), 3), dtype="float32") for i, box in enumerate(boxes): pidx = nverts * i x = 0.5 * box.size[0] * box.left y = 0.5 * box.size[1] * box.up z = 0.5 * box.size[2] * box.front arrow_tip = box.center + z + box.arrow_length * box.front arrow_mid = box.center + z + 0.60 * box.arrow_length * box.front head_length = 0.3 * box.arrow_length # It seems to be substantially faster to assign directly for the # points, as opposed to points[pidx:pidx+nverts] = np.stack((...)) points[pidx] = box.center + x + y + z points[pidx + 1] = box.center - x + y + z points[pidx + 2] = box.center - x + y - z points[pidx + 3] = box.center + x + y - z points[pidx + 4] = box.center + x - y + z points[pidx + 5] = box.center - x - y + z points[pidx + 6] = box.center - x - y - z points[pidx + 7] = box.center + x - y - z points[pidx + 8] = box.center + z points[pidx + 9] = arrow_tip points[pidx + 10] = arrow_mid + head_length * box.up points[pidx + 11] = arrow_mid - head_length * box.up points[pidx + 12] = arrow_mid + head_length * box.left points[pidx + 13] = arrow_mid - head_length * box.left # It is faster to break the indices and colors into their own loop. for i, box in enumerate(boxes): pidx = nverts * i idx = nlines * i indices[idx:idx + nlines] = ((pidx, pidx + 1), (pidx + 1, pidx + 2), (pidx + 2, pidx + 3), (pidx + 3, pidx), (pidx + 4, pidx + 5), (pidx + 5, pidx + 6), (pidx + 6, pidx + 7), (pidx + 7, pidx + 4), (pidx + 0, pidx + 4), (pidx + 1, pidx + 5), (pidx + 2, pidx + 6), (pidx + 3, pidx + 7), (pidx + 8, pidx + 9), (pidx + 9, pidx + 10), (pidx + 9, pidx + 11), (pidx + 9, pidx + 12), (pidx + 9, pidx + 13)) if lut is not None and box.label_class in lut.labels: label = lut.labels[box.label_class] c = (label.color[0], label.color[1], label.color[2]) else: if box.confidence == -1.0: c = (0., 1.0, 0.) # GT: Green elif box.confidence >= 0 and box.confidence <= 1.0: c = (1.0, 0., 0.) # Prediction: red else: c = (0.5, 0.5, 0.5) # Grey colors[idx:idx + nlines] = c # copies c to each element in the range if out_format == "lineset": lines = cv3d.geometry.LineSet() lines.points = cv3d.utility.Vector3dVector(points) lines.lines = cv3d.utility.Vector2iVector(indices) lines.colors = cv3d.utility.Vector3dVector(colors) elif out_format == "dict": lines = { "vertex_positions": points, "line_indices": indices, "line_colors": colors, "bbox_labels": tuple(b.label_class for b in boxes), "bbox_confidences": tuple(b.confidence for b in boxes) } return lines
[docs] @staticmethod def project_to_img(boxes, img, lidar2img_rt=np.ones(4), lut=None): """Returns image with projected 3D bboxes Args: boxes: the list of bounding boxes img: an RGB image lidar2img_rt: 4x4 transformation from lidar frame to image plane lut: a ml3d.vis.LabelLUT that is used to look up the color based on the label_class argument of the BoundingBox3D constructor. If not provided, a color of 50% grey will be used. (optional) """ lines = BoundingBox3D.create_lines(boxes, lut, out_format="dict") points = lines["vertex_positions"] indices = lines["line_indices"] colors = lines["line_colors"] pts_4d = np.concatenate( [points.reshape(-1, 3), np.ones((len(boxes) * 14, 1))], axis=-1) pts_2d = pts_4d @ lidar2img_rt.T pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5) pts_2d[:, 0] /= pts_2d[:, 2] pts_2d[:, 1] /= pts_2d[:, 2] imgfov_pts_2d = pts_2d[..., :2].reshape(len(boxes), 14, 2) indices_2d = indices[..., :2].reshape(len(boxes), 17, 2) colors_2d = colors[..., :3].reshape(len(boxes), 17, 3) return BoundingBox3D.plot_rect3d_on_img(img, len(boxes), imgfov_pts_2d, indices_2d, colors_2d, thickness=3)
[docs] @staticmethod def plot_rect3d_on_img(img, num_rects, rect_corners, line_indices, color=None, thickness=1): """Plot the boundary lines of 3D rectangular on 2D images. Args: img (numpy.array): The numpy array of image. num_rects (int): Number of 3D rectangulars. rect_corners (numpy.array): Coordinates of the corners of 3D rectangulars. Should be in the shape of [num_rect, 8, 2] or [num_rect, 14, 2] if counting arrows. line_indices (numpy.array): indicates connectivity of lines between rect_corners. Should be in the shape of [num_rect, 12, 2] or [num_rect, 17, 2] if counting arrows. color (tuple[int]): The color to draw bboxes. Default: (1.0, 1.0, 1.0), i.e. white. thickness (int, optional): The thickness of bboxes. Default: 1. """ img_pil = Image.fromarray(img) draw = ImageDraw.Draw(img_pil) if color is None: color = np.ones((line_indices.shape[0], line_indices.shape[1], 3)) for i in range(num_rects): corners = rect_corners[i].astype(np.int32) # ignore boxes outside a certain threshold interesting_corners_scale = 3.0 if min(corners[:, 0] ) < -interesting_corners_scale * img.shape[1] or max( corners[:, 0] ) > interesting_corners_scale * img.shape[1] or min( corners[:, 1] ) < -interesting_corners_scale * img.shape[0] or max( corners[:, 1]) > interesting_corners_scale * img.shape[0]: continue for j, (start, end) in enumerate(line_indices[i]): c = tuple(color[i][j] * 255) # TODO: not working c = (int(c[0]), int(c[1]), int(c[2])) if i != 0: pt1 = (corners[(start) % (14 * i), 0], corners[(start) % (14 * i), 1]) pt2 = (corners[(end) % (14 * i), 0], corners[(end) % (14 * i), 1]) else: pt1 = (corners[start, 0], corners[start, 1]) pt2 = (corners[end, 0], corners[end, 1]) draw.line([pt1, pt2], fill=c, width=thickness) return np.array(img_pil).astype(np.uint8)