ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
image.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include "t/geometry/Image.h"
9 
10 #include <string>
11 #include <unordered_map>
12 
14 #include "pybind/docstring.h"
15 #include "pybind/pybind_utils.h"
17 #include "t/geometry/RGBDImage.h"
18 
19 namespace cloudViewer {
20 namespace t {
21 namespace geometry {
22 
23 // Image functions have similar arguments, thus the arg docstrings may be shared
24 static const std::unordered_map<std::string, std::string>
26  {"color", "The color image."},
27  {"depth", "The depth image."},
28  {"aligned",
29  "Are the two images aligned (same viewpoint and resolution)?"},
30  {"image", "The Image object."},
31  {"tensor",
32  "Tensor of the image. The tensor must be contiguous. The "
33  "tensor must be 2D (rows, cols) or 3D (rows, cols, "
34  "channels)."},
35  {"rows",
36  "Number of rows of the image, i.e. image height. rows must be "
37  "non-negative."},
38  {"cols",
39  "Number of columns of the image, i.e. image width. cols must "
40  "be non-negative."},
41  {"channels",
42  "Number of channels of the image. E.g. for RGB image, "
43  "channels == 3; for grayscale image, channels == 1. channels "
44  "must be greater than 0."},
45  {"dtype", "Data type of the image."},
46  {"device", "Device where the image is stored."},
47  {"scale",
48  "First multiply image pixel values with this factor. "
49  "This should be positive for unsigned dtypes."},
50  {"offset", "Then add this factor to all image pixel values."},
51  {"kernel_size", "Kernel size for filters and dilations."},
52  {"value_sigma", "Standard deviation for the image content."},
53  {"distance_sigma",
54  "Standard deviation for the image pixel positions."}};
55 
56 void pybind_image(py::module &m) {
57  py::class_<Image, PyGeometry<Image>, std::shared_ptr<Image>, Geometry>
58  image(m, "Image", py::buffer_protocol(),
59  "The Image class stores image with customizable rols, cols, "
60  "channels, dtype and device.");
61 
62  py::native_enum<Image::InterpType>(m, "InterpType", "enum.Enum",
63  "Interpolation type.")
64  .value("Nearest", Image::InterpType::Nearest)
65  .value("Linear", Image::InterpType::Linear)
66  .value("Cubic", Image::InterpType::Cubic)
67  .value("Lanczos", Image::InterpType::Lanczos)
68  .value("Super", Image::InterpType::Super)
69  .export_values()
70  .finalize();
71 
72  // Constructors
73  image.def(py::init<int64_t, int64_t, int64_t, core::Dtype, core::Device>(),
74  "Row-major storage is used, similar to OpenCV. Use (row, col, "
75  "channel) indexing order for image creation and accessing. In "
76  "general, (r, c, ch) are the preferred variable names for "
77  "consistency, and avoid using width, height, u, v, x, y for "
78  "coordinates.",
79  "rows"_a = 0, "cols"_a = 0, "channels"_a = 1,
80  "dtype"_a = core::Float32, "device"_a = core::Device("CPU:0"))
81  .def(py::init<core::Tensor &>(),
82  "Construct from a tensor. The tensor won't be copied and "
83  "memory will be shared.",
84  "tensor"_a);
85  docstring::ClassMethodDocInject(m, "Image", "__init__",
87  py::detail::bind_copy_functions<Image>(image);
88 
89  // Pickle support.
90  image.def(py::pickle(
91  [](const Image &image) {
92  // __getstate__
93  return py::make_tuple(image.AsTensor());
94  },
95  [](py::tuple t) {
96  // __setstate__
97  if (t.size() != 1) {
99  "Cannot unpickle Image! Expecting a tuple of size "
100  "1.");
101  }
102  return Image(t[0].cast<core::Tensor>());
103  }));
104 
105  // Buffer protocol.
106  image.def_buffer([](Image &I) -> py::buffer_info {
107  if (!I.IsCPU()) {
109  "Cannot convert image buffer since it's not on CPU. "
110  "Convert to CPU image by calling .cpu() first.");
111  }
112  core::SizeVector strides_in_bytes = I.AsTensor().GetStrides();
113  const int64_t element_byte_size = I.GetDtype().ByteSize();
114  for (size_t i = 0; i < strides_in_bytes.size(); i++) {
115  strides_in_bytes[i] *= element_byte_size;
116  }
117  return py::buffer_info(I.GetDataPtr(), element_byte_size,
119  I.AsTensor().NumDims(), I.AsTensor().GetShape(),
120  strides_in_bytes);
121  });
122  // Info.
123  image.def_property_readonly("dtype", &Image::GetDtype,
124  "Get dtype of the image")
125  .def_property_readonly("device", &Image::GetDevice,
126  "Get the device of the image.")
127  .def_property_readonly("rows", &Image::GetRows,
128  "Get the number of rows of the image.")
129  .def_property_readonly("columns", &Image::GetCols,
130  "Get the number of columns of the image.")
131  .def_property_readonly("channels", &Image::GetChannels,
132  "Get the number of channels of the image.")
133  // functions
134  .def("clear", &Image::Clear, "Clear stored data.")
135  .def("is_empty", &Image::IsEmpty, "Is any data stored?")
136  .def("get_min_bound", &Image::GetMinBound,
137  "Compute min 2D coordinates for the data (always {0, 0}).")
138  .def("get_max_bound", &Image::GetMaxBound,
139  "Compute max 2D coordinates for the data ({rows, cols}).")
140  .def("linear_transform", &Image::LinearTransform,
141  "Function to linearly transform pixel intensities in place: "
142  "image = scale * image + offset.",
143  "scale"_a = 1.0, "offset"_a = 0.0)
144  .def("dilate", &Image::Dilate,
145  "Return a new image after performing morphological dilation. "
146  "Supported datatypes are UInt8, UInt16 and Float32 with "
147  "{1, 3, 4} channels. An 8-connected neighborhood is used to "
148  "create the dilation mask.",
149  "kernel_size"_a = 3)
150  .def("filter", &Image::Filter,
151  "Return a new image after filtering with the given kernel.",
152  "kernel"_a)
153  .def("filter_gaussian", &Image::FilterGaussian,
154  "Return a new image after Gaussian filtering. "
155  "Possible kernel_size: odd numbers >= 3 are supported.",
156  "kernel_size"_a = 3, "sigma"_a = 1.0)
157  .def("filter_bilateral", &Image::FilterBilateral,
158  "Return a new image after bilateral filtering."
159  "Note: CPU (IPP) and CUDA (NPP) versions are inconsistent: "
160  "CPU uses a round kernel (radius = floor(kernel_size / 2)), "
161  "while CUDA uses a square kernel (width = kernel_size). "
162  "Make sure to tune parameters accordingly.",
163  "kernel_size"_a = 3, "value_sigma"_a = 20.0,
164  "dist_sigma"_a = 10.0)
165  .def("filter_sobel", &Image::FilterSobel,
166  "Return a pair of new gradient images (dx, dy) after Sobel "
167  "filtering. Possible kernel_size: 3 and 5.",
168  "kernel_size"_a = 3)
169  .def("resize", &Image::Resize,
170  "Return a new image after resizing with specified "
171  "interpolation type. Downsample if sampling rate is < 1. "
172  "Upsample if sampling rate > 1. Aspect ratio is always "
173  "kept.",
174  "sampling_rate"_a = 0.5,
175  py::arg_v("interp_type", Image::InterpType::Nearest,
176  "cloudViewer.t.geometry.InterpType.Nearest"))
177  .def("pyrdown", &Image::PyrDown,
178  "Return a new downsampled image with pyramid downsampling "
179  "formed by a chained Gaussian filter (kernel_size = 5, sigma"
180  " = 1.0) and a resize (ratio = 0.5) operation.")
181  .def("rgb_to_gray", &Image::RGBToGray,
182  "Converts a 3-channel RGB image to a new 1-channel Grayscale "
183  "image by I = 0.299 * R + 0.587 * G + 0.114 * B.")
184  .def("__repr__", &Image::ToString);
185  docstring::ClassMethodDocInject(m, "Image", "linear_transform",
187 
188  // Depth utilities.
189  image.def("clip_transform", &Image::ClipTransform,
190  "Preprocess a image of shape (rows, cols, channels=1), typically"
191  " used for a depth image. UInt16 and Float32 Dtypes supported. "
192  "Each pixel will be transformed by\n"
193  "x = x / scale\n"
194  "x = x < min_value ? clip_fill : x\n"
195  "x = x > max_value ? clip_fill : x\n"
196  "Use INF, NAN or 0.0 (default) for clip_fill",
197  "scale"_a, "min_value"_a, "max_value"_a, "clip_fill"_a = 0.0f);
198  image.def("create_vertex_map", &Image::CreateVertexMap,
199  "Create a vertex map of shape (rows, cols, channels=3) in Float32"
200  " from an image of shape (rows, cols, channels=1) in Float32 "
201  "using unprojection. The input depth is expected to be the output"
202  " of clip_transform.",
203  "intrinsics"_a, "invalid_fill"_a = 0.0f);
204  image.def("create_normal_map", &Image::CreateNormalMap,
205  "Create a normal map of shape (rows, cols, channels=3) in Float32"
206  " from a vertex map of shape (rows, cols, channels=1) in Float32 "
207  "using cross product of V(r, c+1)-V(r, c) and V(r+1, c)-V(r, c)"
208  ". The input vertex map is expected to be the output of "
209  "create_vertex_map. You may need to start with a filtered depth "
210  " image (e.g. with filter_bilateral) to obtain good results.",
211  "invalid_fill"_a = 0.0f);
212  image.def(
213  "colorize_depth", &Image::ColorizeDepth,
214  "Colorize an input depth image (with Dtype UInt16 or Float32). The"
215  " image values are divided by scale, then clamped within "
216  "(min_value, max_value) and finally converted to a 3 channel UInt8"
217  " RGB image using the Turbo colormap as a lookup table.",
218  "scale"_a, "min_value"_a, "max_value"_a);
219 
220  // Device transfers.
221  image.def("to",
222  py::overload_cast<const core::Device &, bool>(&Image::To,
223  py::const_),
224  "Transfer the Image to a specified device. A new image is "
225  "always created if copy is true, else it is avoided when the "
226  "original image is already on the target device.",
227  "device"_a, "copy"_a = false);
228  image.def("clone", &Image::Clone,
229  "Returns a copy of the Image on the same device.");
230  image.def(
231  "cpu",
232  [](const Image &image) { return image.To(core::Device("CPU:0")); },
233  "Transfer the image to CPU. If the image "
234  "is already on CPU, no copy will be performed.");
235  image.def(
236  "cuda",
237  [](const Image &image, int device_id) {
238  return image.To(core::Device("CUDA", device_id));
239  },
240  "Transfer the image to a CUDA device. If the image is already "
241  "on the specified CUDA device, no copy will be performed.",
242  "device_id"_a = 0);
243 
244  // Conversion.
245  image.def("to",
246  py::overload_cast<core::Dtype, bool, utility::optional<double>,
247  double>(&Image::To, py::const_),
248  "Returns an Image with the specified Dtype.", "dtype"_a,
249  "copy"_a = false, "scale"_a = py::none(), "offset"_a = 0.0);
251  m, "Image", "to",
252  {{"dtype", "The targeted dtype to convert to."},
253  {"scale",
254  "Optional scale value. This is 1./255 for UInt8 -> Float{32,64}, "
255  "1./65535 for UInt16 -> Float{32,64} and 1 otherwise"},
256  {"offset", "Optional shift value. Default 0."},
257  {"copy",
258  "If true, a new tensor is always created; if false, the copy is "
259  "avoided when the original tensor already has the targeted "
260  "dtype."}});
261  image.def("to_legacy", &Image::ToLegacy, "Convert to legacy Image type.");
262  image.def_static("from_legacy", &Image::FromLegacy, "image_legacy"_a,
263  "device"_a = core::Device("CPU:0"),
264  "Create a Image from a legacy CloudViewer Image.");
265  image.def("as_tensor", &Image::AsTensor);
266 
267  docstring::ClassMethodDocInject(m, "Image", "get_min_bound");
268  docstring::ClassMethodDocInject(m, "Image", "get_max_bound");
269  docstring::ClassMethodDocInject(m, "Image", "clear");
270  docstring::ClassMethodDocInject(m, "Image", "is_empty");
271  docstring::ClassMethodDocInject(m, "Image", "to_legacy");
272 
273  py::class_<RGBDImage, PyGeometry<RGBDImage>, std::shared_ptr<RGBDImage>,
274  Geometry>
275  rgbd_image(
276  m, "RGBDImage",
277  "RGBDImage is a pair of color and depth images. For most "
278  "procesing, the image pair should be aligned (same "
279  "viewpoint and "
280  "resolution).");
281  rgbd_image
282  // Constructors.
283  .def(py::init<>(), "Construct an empty RGBDImage.")
284  .def(py::init<const Image &, const Image &, bool>(),
285  "Parameterized constructor", "color"_a, "depth"_a,
286  "aligned"_a = true)
287 
288  // Pickling support.
289  .def(py::pickle(
290  [](const RGBDImage &rgbd) {
291  // __getstate__
292  return py::make_tuple(rgbd.color_, rgbd.depth_,
293  rgbd.aligned_);
294  },
295  [](py::tuple t) {
296  // __setstate__
297  if (t.size() != 3) {
299  "Cannot unpickle RGBDImage! Expecting a "
300  "tuple of size 3.");
301  }
302 
303  return RGBDImage(t[0].cast<Image>(), t[1].cast<Image>(),
304  t[2].cast<bool>());
305  }))
306 
307  // Depth and color images.
308  .def_readwrite("color", &RGBDImage::color_, "The color image.")
309  .def_readwrite("depth", &RGBDImage::depth_, "The depth image.")
310  .def_readwrite("aligned_", &RGBDImage::aligned_,
311  "Are the depth and color images aligned (same "
312  "viewpoint and resolution)?")
313  // Functions.
314  .def("clear", &RGBDImage::Clear, "Clear stored data.")
315  .def("is_empty", &RGBDImage::IsEmpty, "Is any data stored?")
316  .def("are_aligned", &RGBDImage::AreAligned,
317  "Are the depth and color images aligned (same viewpoint and "
318  "resolution)?")
319  .def("get_min_bound", &RGBDImage::GetMinBound,
320  "Compute min 2D coordinates for the data (always {0, 0}).")
321  .def("get_max_bound", &RGBDImage::GetMaxBound,
322  "Compute max 2D coordinates for the data.")
323  // Device transfers.
324  .def("to",
325  py::overload_cast<const core::Device &, bool>(&RGBDImage::To,
326  py::const_),
327  "Transfer the RGBDImage to a specified device.", "device"_a,
328  "copy"_a = false)
329  .def("clone", &RGBDImage::Clone,
330  "Returns a copy of the RGBDImage on the same device.")
331  .def(
332  "cpu",
333  [](const RGBDImage &rgbd_image) {
334  return rgbd_image.To(core::Device("CPU:0"));
335  },
336  "Transfer the RGBD image to CPU. If the RGBD image "
337  "is already on CPU, no copy will be performed.")
338  .def(
339  "cuda",
340  [](const RGBDImage &rgbd_image, int device_id) {
341  return rgbd_image.To(core::Device("CUDA", device_id));
342  },
343  "Transfer the RGBD image to a CUDA device. If the RGBD "
344  "image is already "
345  "on the specified CUDA device, no copy will be performed.",
346  "device_id"_a = 0)
347 
348  // Conversion.
349  .def("to_legacy", &RGBDImage::ToLegacy,
350  "Convert to legacy RGBDImage type.")
351  // Description.
352  .def("__repr__", &RGBDImage::ToString);
353 
354  docstring::ClassMethodDocInject(m, "RGBDImage", "get_min_bound");
355  docstring::ClassMethodDocInject(m, "RGBDImage", "get_max_bound");
356  docstring::ClassMethodDocInject(m, "RGBDImage", "clear");
357  docstring::ClassMethodDocInject(m, "RGBDImage", "is_empty");
358  docstring::ClassMethodDocInject(m, "RGBDImage", "to_legacy");
359  docstring::ClassMethodDocInject(m, "RGBDImage", "__init__",
361 }
362 
363 } // namespace geometry
364 } // namespace t
365 } // namespace cloudViewer
Common CUDA utilities.
std::shared_ptr< core::Tensor > image
int64_t ByteSize() const
Definition: Dtype.h:59
bool IsCPU() const
Definition: Device.h:95
int64_t NumDims() const
Definition: Tensor.h:1172
SizeVector GetStrides() const
Definition: Tensor.h:1135
SizeVector GetShape() const
Definition: Tensor.h:1127
The base geometry class.
Definition: Geometry.h:23
The Image class stores image with customizable rows, cols, channels, dtype and device.
Definition: Image.h:29
Image Dilate(int kernel_size=3) const
Return a new image after performing morphological dilation.
Definition: Image.cpp:203
Image ClipTransform(float scale, float min_value, float max_value, float clip_fill=0.0f) const
Return new image after scaling and clipping image values.
Definition: Image.cpp:417
@ Super
Super sampling interpolation (only downsample).
@ Lanczos
Lanczos filter interpolation.
@ Nearest
Nearest neighbors interpolation.
Image PyrDown() const
Return a new downsampled image with pyramid downsampling.
Definition: Image.cpp:395
std::string ToString() const
Text description.
Definition: Image.cpp:555
static Image FromLegacy(const cloudViewer::geometry::Image &image_legacy, const core::Device &Device=core::Device("CPU:0"))
Create from a legacy CloudViewer Image.
Definition: Image.cpp:505
Image Resize(float sampling_rate=0.5f, InterpType interp_type=InterpType::Nearest) const
Return a new image after resizing with specified interpolation type.
Definition: Image.cpp:156
cloudViewer::geometry::Image ToLegacy() const
Convert to legacy Image type.
Definition: Image.cpp:534
Image ColorizeDepth(float scale, float min_value, float max_value)
Colorize an input depth image (with Dtype UInt16 or Float32).
Definition: Image.cpp:482
core::Tensor GetMinBound() const
Compute min 2D coordinates for the data (always {0, 0}).
Definition: Image.h:315
Image CreateVertexMap(const core::Tensor &intrinsics, float invalid_fill=0.0f)
Create a vertex map from a depth image using unprojection.
Definition: Image.cpp:449
core::Device GetDevice() const override
Get device of the image.
Definition: Image.h:96
Image FilterGaussian(int kernel_size=3, float sigma=1.0f) const
Return a new image after Gaussian filtering.
Definition: Image.cpp:309
core::Tensor GetMaxBound() const
Compute max 2D coordinates for the data ({rows, cols}).
Definition: Image.h:320
void * GetDataPtr()
Get raw buffer of the Image data.
Definition: Image.h:118
bool IsEmpty() const override
Returns true if rows * cols * channels == 0.
Definition: Image.h:71
Image & LinearTransform(double scale=1.0, double offset=0.0)
Function to linearly transform pixel intensities in place.
Definition: Image.h:167
Image RGBToGray() const
Converts a 3-channel RGB image to a new 1-channel Grayscale image.
Definition: Image.cpp:120
Image Clone() const
Returns copy of the image on the same device.
Definition: Image.h:143
Image To(const core::Device &device, bool copy=false) const
Transfer the image to a specified device.
Definition: Image.h:132
Image CreateNormalMap(float invalid_fill=0.0f)
Create a normal map from a vertex map.
Definition: Image.cpp:467
core::Dtype GetDtype() const
Get dtype of the image.
Definition: Image.h:93
std::pair< Image, Image > FilterSobel(int kernel_size=3) const
Return a pair of new gradient images (dx, dy) after Sobel filtering.
Definition: Image.cpp:345
core::Tensor AsTensor() const
Returns the underlying Tensor of the Image.
Definition: Image.h:124
int64_t GetChannels() const
Get the number of channels of the image.
Definition: Image.h:90
Image & Clear() override
Clear image contents by resetting the rows and cols to 0, while keeping channels, dtype and device un...
Definition: Image.h:65
Image FilterBilateral(int kernel_size=3, float value_sigma=20.0f, float distance_sigma=10.0f) const
Return a new image after bilateral filtering.
Definition: Image.cpp:239
int64_t GetCols() const
Get the number of columns of the image.
Definition: Image.h:87
Image Filter(const core::Tensor &kernel) const
Return a new image after filtering with the given kernel.
Definition: Image.cpp:278
int64_t GetRows() const
Get the number of rows of the image.
Definition: Image.h:84
RGBDImage A pair of color and depth images.
Definition: RGBDImage.h:21
bool IsEmpty() const override
Is any data stored?
Definition: RGBDImage.cpp:20
Image depth_
The depth image.
Definition: RGBDImage.h:106
core::Tensor GetMinBound() const
Compute min 2D coordinates for the data (always {0,0}).
Definition: RGBDImage.h:68
RGBDImage To(const core::Device &device, bool copy=false) const
Definition: RGBDImage.h:85
bool AreAligned() const
Are the depth and color images aligned (same viewpoint and resolution)?
Definition: RGBDImage.h:65
std::string ToString() const
Text description.
Definition: RGBDImage.cpp:22
bool aligned_
Are the depth and color images aligned (same viewpoint and resolution)?
Definition: RGBDImage.h:108
cloudViewer::geometry::RGBDImage ToLegacy() const
Convert to the legacy RGBDImage format.
Definition: RGBDImage.h:94
RGBDImage & Clear() override
Clear stored data.
Definition: RGBDImage.cpp:14
core::Tensor GetMaxBound() const
Compute max 2D coordinates for the data.
Definition: RGBDImage.h:73
Image color_
The color image.
Definition: RGBDImage.h:104
RGBDImage Clone() const
Returns copy of the RGBD image on the same device.
Definition: RGBDImage.h:91
#define LogError(...)
Definition: Logging.h:60
const Dtype Float32
Definition: Dtype.cpp:42
void ClassMethodDocInject(py::module &pybind_module, const std::string &class_name, const std::string &function_name, const std::unordered_map< std::string, std::string > &map_parameter_body_docs)
Definition: docstring.cpp:27
std::string DtypeToArrayFormat(const core::Dtype &dtype)
void pybind_image(py::module &m)
Definition: image.cpp:56
static const std::unordered_map< std::string, std::string > map_shared_argument_docstrings
Definition: image.cpp:25
Generic file read and write utility for python interface.