Source code for ml3d.vis.boundingbox

import numpy as np
import cloudViewer as cv3d
from PIL import Image, ImageDraw


[docs]class BoundingBox3D:
    """Class that defines an axially-oriented bounding box."""

    next_id = 1

[docs]    def __init__(self,
                 center,
                 front,
                 up,
                 left,
                 size,
                 label_class,
                 confidence,
                 meta=None,
                 show_class=False,
                 show_confidence=False,
                 show_meta=None,
                 identifier=None,
                 arrow_length=1.0):
        """Creates a bounding box.

        Front, up, left define the axis of the box and must be normalized and
        mutually orthogonal.

        Args:
            center: (x, y, z) that defines the center of the box.
            front: normalized (i, j, k) that defines the front direction of the
                box.
            up: normalized (i, j, k) that defines the up direction of the box.
            left: normalized (i, j, k) that defines the left direction of the
                box.
            size: (width, height, depth) that defines the size of the box, as
                measured from edge to edge.
            label_class: integer specifying the classification label. If an LUT
                is specified in create_lines() this will be used to determine
                the color of the box.
            confidence: confidence level of the box.
            meta: a user-defined string (optional).
            show_class: displays the class label in text near the box
                (optional).
            show_confidence: displays the confidence value in text near the box
                (optional).
            show_meta: displays the meta string in text near the box (optional).
            identifier: a unique integer that defines the id for the box
                (optional, will be generated if not provided).
            arrow_length: the length of the arrow in the front_direct. Set to
                zero to disable the arrow (optional).
        """
        assert (len(center) == 3)
        assert (len(front) == 3)
        assert (len(up) == 3)
        assert (len(left) == 3)
        assert (len(size) == 3)

        self.center = np.array(center, dtype="float32")
        self.front = np.array(front, dtype="float32")
        self.up = np.array(up, dtype="float32")
        self.left = np.array(left, dtype="float32")
        self.size = size
        self.label_class = label_class
        self.confidence = confidence
        self.meta = meta
        self.show_class = show_class
        self.show_confidence = show_confidence
        self.show_meta = show_meta
        if identifier is not None:
            self.identifier = identifier
        else:
            self.identifier = "box:" + str(BoundingBox3D.next_id)
            BoundingBox3D.next_id += 1
        self.arrow_length = arrow_length

    def __repr__(self):
        s = str(self.identifier) + " (class=" + str(
            self.label_class) + ", conf=" + str(self.confidence)
        if self.meta is not None:
            s = s + ", meta=" + str(self.meta)
        s = s + ")"
        return s

[docs]    @staticmethod
    def create_lines(boxes, lut=None, out_format="lineset"):
        """Creates a LineSet that can be used to render the boxes.

        Args:
            boxes: the list of bounding boxes
            lut: a ml3d.vis.LabelLUT that is used to look up the color based on
                the label_class argument of the BoundingBox3D constructor. If
                not provided, a color of 50% grey will be used. (optional)
            out_format (str): Output format. Can be "lineset" (default) for the
                Open3D lineset or "dict" for a dictionary of lineset properties.

        Returns:
            For out_format == "lineset": open3d.geometry.LineSet
            For out_format == "dict": Dictionary of lineset properties
                ("vertex_positions", "line_indices", "line_colors", "bbox_labels",
                "bbox_confidences").
        """
        if out_format not in ('lineset', 'dict'):
            raise ValueError("Please specify an output_format of 'lineset' "
                             "(default) or 'dict'.")

        nverts = 14
        nlines = 17
        points = np.zeros((nverts * len(boxes), 3), dtype="float32")
        indices = np.zeros((nlines * len(boxes), 2), dtype="int32")
        colors = np.zeros((nlines * len(boxes), 3), dtype="float32")

        for i, box in enumerate(boxes):
            pidx = nverts * i
            x = 0.5 * box.size[0] * box.left
            y = 0.5 * box.size[1] * box.up
            z = 0.5 * box.size[2] * box.front
            arrow_tip = box.center + z + box.arrow_length * box.front
            arrow_mid = box.center + z + 0.60 * box.arrow_length * box.front
            head_length = 0.3 * box.arrow_length
            # It seems to be substantially faster to assign directly for the
            # points, as opposed to points[pidx:pidx+nverts] = np.stack((...))
            points[pidx] = box.center + x + y + z
            points[pidx + 1] = box.center - x + y + z
            points[pidx + 2] = box.center - x + y - z
            points[pidx + 3] = box.center + x + y - z
            points[pidx + 4] = box.center + x - y + z
            points[pidx + 5] = box.center - x - y + z
            points[pidx + 6] = box.center - x - y - z
            points[pidx + 7] = box.center + x - y - z
            points[pidx + 8] = box.center + z
            points[pidx + 9] = arrow_tip
            points[pidx + 10] = arrow_mid + head_length * box.up
            points[pidx + 11] = arrow_mid - head_length * box.up
            points[pidx + 12] = arrow_mid + head_length * box.left
            points[pidx + 13] = arrow_mid - head_length * box.left

        # It is faster to break the indices and colors into their own loop.
        for i, box in enumerate(boxes):
            pidx = nverts * i
            idx = nlines * i
            indices[idx:idx +
                    nlines] = ((pidx, pidx + 1), (pidx + 1, pidx + 2),
                               (pidx + 2, pidx + 3), (pidx + 3, pidx),
                               (pidx + 4, pidx + 5), (pidx + 5, pidx + 6),
                               (pidx + 6, pidx + 7), (pidx + 7, pidx + 4),
                               (pidx + 0, pidx + 4), (pidx + 1, pidx + 5),
                               (pidx + 2, pidx + 6), (pidx + 3, pidx + 7),
                               (pidx + 8, pidx + 9), (pidx + 9, pidx + 10),
                               (pidx + 9, pidx + 11), (pidx + 9,
                                                       pidx + 12), (pidx + 9,
                                                                    pidx + 13))

            if lut is not None and box.label_class in lut.labels:
                label = lut.labels[box.label_class]
                c = (label.color[0], label.color[1], label.color[2])
            else:
                if box.confidence == -1.0:
                    c = (0., 1.0, 0.)  # GT: Green
                elif box.confidence >= 0 and box.confidence <= 1.0:
                    c = (1.0, 0., 0.)  # Prediction: red
                else:
                    c = (0.5, 0.5, 0.5)  # Grey

            colors[idx:idx +
                   nlines] = c  # copies c to each element in the range
        if out_format == "lineset":
            lines = cv3d.geometry.LineSet()
            lines.points = cv3d.utility.Vector3dVector(points)
            lines.lines = cv3d.utility.Vector2iVector(indices)
            lines.colors = cv3d.utility.Vector3dVector(colors)
        elif out_format == "dict":
            lines = {
                "vertex_positions": points,
                "line_indices": indices,
                "line_colors": colors,
                "bbox_labels": tuple(b.label_class for b in boxes),
                "bbox_confidences": tuple(b.confidence for b in boxes)
            }

        return lines

[docs]    @staticmethod
    def project_to_img(boxes, img, lidar2img_rt=np.ones(4), lut=None):
        """Returns image with projected 3D bboxes

        Args:
            boxes: the list of bounding boxes
            img: an RGB image
            lidar2img_rt: 4x4 transformation from lidar frame to image plane
            lut: a ml3d.vis.LabelLUT that is used to look up the color based on
                the label_class argument of the BoundingBox3D constructor. If
                not provided, a color of 50% grey will be used. (optional)
        """
        lines = BoundingBox3D.create_lines(boxes, lut, out_format="dict")
        points = lines["vertex_positions"]
        indices = lines["line_indices"]
        colors = lines["line_colors"]

        pts_4d = np.concatenate(
            [points.reshape(-1, 3),
             np.ones((len(boxes) * 14, 1))], axis=-1)
        pts_2d = pts_4d @ lidar2img_rt.T

        pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
        pts_2d[:, 0] /= pts_2d[:, 2]
        pts_2d[:, 1] /= pts_2d[:, 2]
        imgfov_pts_2d = pts_2d[..., :2].reshape(len(boxes), 14, 2)
        indices_2d = indices[..., :2].reshape(len(boxes), 17, 2)
        colors_2d = colors[..., :3].reshape(len(boxes), 17, 3)

        return BoundingBox3D.plot_rect3d_on_img(img,
                                                len(boxes),
                                                imgfov_pts_2d,
                                                indices_2d,
                                                colors_2d,
                                                thickness=3)

[docs]    @staticmethod
    def plot_rect3d_on_img(img,
                           num_rects,
                           rect_corners,
                           line_indices,
                           color=None,
                           thickness=1):
        """Plot the boundary lines of 3D rectangular on 2D images.

        Args:
            img (numpy.array): The numpy array of image.
            num_rects (int): Number of 3D rectangulars.
            rect_corners (numpy.array): Coordinates of the corners of 3D
                rectangulars. Should be in the shape of [num_rect, 8, 2] or
                [num_rect, 14, 2] if counting arrows.
            line_indices (numpy.array): indicates connectivity of lines between
                rect_corners.  Should be in the shape of [num_rect, 12, 2] or
                [num_rect, 17, 2] if counting arrows.
            color (tuple[int]): The color to draw bboxes. Default: (1.0, 1.0,
                1.0), i.e. white.
            thickness (int, optional): The thickness of bboxes. Default: 1.
        """
        img_pil = Image.fromarray(img)
        draw = ImageDraw.Draw(img_pil)

        if color is None:
            color = np.ones((line_indices.shape[0], line_indices.shape[1], 3))
        for i in range(num_rects):
            corners = rect_corners[i].astype(np.int32)
            # ignore boxes outside a certain threshold
            interesting_corners_scale = 3.0
            if min(corners[:, 0]
                  ) < -interesting_corners_scale * img.shape[1] or max(
                      corners[:, 0]
                  ) > interesting_corners_scale * img.shape[1] or min(
                      corners[:, 1]
                  ) < -interesting_corners_scale * img.shape[0] or max(
                      corners[:, 1]) > interesting_corners_scale * img.shape[0]:
                continue
            for j, (start, end) in enumerate(line_indices[i]):
                c = tuple(color[i][j] * 255)  # TODO: not working
                c = (int(c[0]), int(c[1]), int(c[2]))
                if i != 0:
                    pt1 = (corners[(start) % (14 * i),
                                   0], corners[(start) % (14 * i), 1])
                    pt2 = (corners[(end) % (14 * i),
                                   0], corners[(end) % (14 * i), 1])
                else:
                    pt1 = (corners[start, 0], corners[start, 1])
                    pt2 = (corners[end, 0], corners[end, 1])
                draw.line([pt1, pt2], fill=c, width=thickness)
        return np.array(img_pil).astype(np.uint8)