11 #include <unordered_map>
24 static const std::unordered_map<std::string, std::string>
26 {
"color",
"The color image."},
27 {
"depth",
"The depth image."},
29 "Are the two images aligned (same viewpoint and resolution)?"},
30 {
"image",
"The Image object."},
32 "Tensor of the image. The tensor must be contiguous. The "
33 "tensor must be 2D (rows, cols) or 3D (rows, cols, "
36 "Number of rows of the image, i.e. image height. rows must be "
39 "Number of columns of the image, i.e. image width. cols must "
42 "Number of channels of the image. E.g. for RGB image, "
43 "channels == 3; for grayscale image, channels == 1. channels "
44 "must be greater than 0."},
45 {
"dtype",
"Data type of the image."},
46 {
"device",
"Device where the image is stored."},
48 "First multiply image pixel values with this factor. "
49 "This should be positive for unsigned dtypes."},
50 {
"offset",
"Then add this factor to all image pixel values."},
51 {
"kernel_size",
"Kernel size for filters and dilations."},
52 {
"value_sigma",
"Standard deviation for the image content."},
54 "Standard deviation for the image pixel positions."}};
57 py::class_<Image, PyGeometry<Image>, std::shared_ptr<Image>,
Geometry>
58 image(m,
"Image", py::buffer_protocol(),
59 "The Image class stores image with customizable rols, cols, "
60 "channels, dtype and device.");
62 py::native_enum<Image::InterpType>(m,
"InterpType",
"enum.Enum",
63 "Interpolation type.")
73 image.def(py::init<int64_t, int64_t, int64_t, core::Dtype, core::Device>(),
74 "Row-major storage is used, similar to OpenCV. Use (row, col, "
75 "channel) indexing order for image creation and accessing. In "
76 "general, (r, c, ch) are the preferred variable names for "
77 "consistency, and avoid using width, height, u, v, x, y for "
79 "rows"_a = 0,
"cols"_a = 0,
"channels"_a = 1,
81 .def(py::init<core::Tensor &>(),
82 "Construct from a tensor. The tensor won't be copied and "
83 "memory will be shared.",
87 py::detail::bind_copy_functions<Image>(
image);
93 return py::make_tuple(
image.AsTensor());
99 "Cannot unpickle Image! Expecting a tuple of size "
102 return Image(t[0].cast<core::Tensor>());
106 image.def_buffer([](
Image &I) -> py::buffer_info {
109 "Cannot convert image buffer since it's not on CPU. "
110 "Convert to CPU image by calling .cpu() first.");
114 for (
size_t i = 0; i < strides_in_bytes.
size(); i++) {
115 strides_in_bytes[i] *= element_byte_size;
117 return py::buffer_info(I.
GetDataPtr(), element_byte_size,
124 "Get dtype of the image")
126 "Get the device of the image.")
128 "Get the number of rows of the image.")
130 "Get the number of columns of the image.")
132 "Get the number of channels of the image.")
137 "Compute min 2D coordinates for the data (always {0, 0}).")
139 "Compute max 2D coordinates for the data ({rows, cols}).")
141 "Function to linearly transform pixel intensities in place: "
142 "image = scale * image + offset.",
143 "scale"_a = 1.0,
"offset"_a = 0.0)
145 "Return a new image after performing morphological dilation. "
146 "Supported datatypes are UInt8, UInt16 and Float32 with "
147 "{1, 3, 4} channels. An 8-connected neighborhood is used to "
148 "create the dilation mask.",
151 "Return a new image after filtering with the given kernel.",
154 "Return a new image after Gaussian filtering. "
155 "Possible kernel_size: odd numbers >= 3 are supported.",
156 "kernel_size"_a = 3,
"sigma"_a = 1.0)
158 "Return a new image after bilateral filtering."
159 "Note: CPU (IPP) and CUDA (NPP) versions are inconsistent: "
160 "CPU uses a round kernel (radius = floor(kernel_size / 2)), "
161 "while CUDA uses a square kernel (width = kernel_size). "
162 "Make sure to tune parameters accordingly.",
163 "kernel_size"_a = 3,
"value_sigma"_a = 20.0,
164 "dist_sigma"_a = 10.0)
166 "Return a pair of new gradient images (dx, dy) after Sobel "
167 "filtering. Possible kernel_size: 3 and 5.",
170 "Return a new image after resizing with specified "
171 "interpolation type. Downsample if sampling rate is < 1. "
172 "Upsample if sampling rate > 1. Aspect ratio is always "
174 "sampling_rate"_a = 0.5,
176 "cloudViewer.t.geometry.InterpType.Nearest"))
178 "Return a new downsampled image with pyramid downsampling "
179 "formed by a chained Gaussian filter (kernel_size = 5, sigma"
180 " = 1.0) and a resize (ratio = 0.5) operation.")
182 "Converts a 3-channel RGB image to a new 1-channel Grayscale "
183 "image by I = 0.299 * R + 0.587 * G + 0.114 * B.")
190 "Preprocess a image of shape (rows, cols, channels=1), typically"
191 " used for a depth image. UInt16 and Float32 Dtypes supported. "
192 "Each pixel will be transformed by\n"
194 "x = x < min_value ? clip_fill : x\n"
195 "x = x > max_value ? clip_fill : x\n"
196 "Use INF, NAN or 0.0 (default) for clip_fill",
197 "scale"_a,
"min_value"_a,
"max_value"_a,
"clip_fill"_a = 0.0f);
199 "Create a vertex map of shape (rows, cols, channels=3) in Float32"
200 " from an image of shape (rows, cols, channels=1) in Float32 "
201 "using unprojection. The input depth is expected to be the output"
202 " of clip_transform.",
203 "intrinsics"_a,
"invalid_fill"_a = 0.0f);
205 "Create a normal map of shape (rows, cols, channels=3) in Float32"
206 " from a vertex map of shape (rows, cols, channels=1) in Float32 "
207 "using cross product of V(r, c+1)-V(r, c) and V(r+1, c)-V(r, c)"
208 ". The input vertex map is expected to be the output of "
209 "create_vertex_map. You may need to start with a filtered depth "
210 " image (e.g. with filter_bilateral) to obtain good results.",
211 "invalid_fill"_a = 0.0f);
214 "Colorize an input depth image (with Dtype UInt16 or Float32). The"
215 " image values are divided by scale, then clamped within "
216 "(min_value, max_value) and finally converted to a 3 channel UInt8"
217 " RGB image using the Turbo colormap as a lookup table.",
218 "scale"_a,
"min_value"_a,
"max_value"_a);
222 py::overload_cast<const core::Device &, bool>(&
Image::To,
224 "Transfer the Image to a specified device. A new image is "
225 "always created if copy is true, else it is avoided when the "
226 "original image is already on the target device.",
227 "device"_a,
"copy"_a =
false);
229 "Returns a copy of the Image on the same device.");
233 "Transfer the image to CPU. If the image "
234 "is already on CPU, no copy will be performed.");
240 "Transfer the image to a CUDA device. If the image is already "
241 "on the specified CUDA device, no copy will be performed.",
248 "Returns an Image with the specified Dtype.",
"dtype"_a,
249 "copy"_a =
false,
"scale"_a = py::none(),
"offset"_a = 0.0);
252 {{
"dtype",
"The targeted dtype to convert to."},
254 "Optional scale value. This is 1./255 for UInt8 -> Float{32,64}, "
255 "1./65535 for UInt16 -> Float{32,64} and 1 otherwise"},
256 {
"offset",
"Optional shift value. Default 0."},
258 "If true, a new tensor is always created; if false, the copy is "
259 "avoided when the original tensor already has the targeted "
264 "Create a Image from a legacy CloudViewer Image.");
273 py::class_<RGBDImage, PyGeometry<RGBDImage>, std::shared_ptr<RGBDImage>,
277 "RGBDImage is a pair of color and depth images. For most "
278 "procesing, the image pair should be aligned (same "
283 .def(py::init<>(),
"Construct an empty RGBDImage.")
284 .def(py::init<const Image &, const Image &, bool>(),
285 "Parameterized constructor",
"color"_a,
"depth"_a,
299 "Cannot unpickle RGBDImage! Expecting a "
303 return RGBDImage(t[0].cast<Image>(), t[1].cast<Image>(),
311 "Are the depth and color images aligned (same "
312 "viewpoint and resolution)?")
317 "Are the depth and color images aligned (same viewpoint and "
320 "Compute min 2D coordinates for the data (always {0, 0}).")
322 "Compute max 2D coordinates for the data.")
325 py::overload_cast<const core::Device &, bool>(&
RGBDImage::To,
327 "Transfer the RGBDImage to a specified device.",
"device"_a,
330 "Returns a copy of the RGBDImage on the same device.")
336 "Transfer the RGBD image to CPU. If the RGBD image "
337 "is already on CPU, no copy will be performed.")
340 [](
const RGBDImage &rgbd_image,
int device_id) {
343 "Transfer the RGBD image to a CUDA device. If the RGBD "
345 "on the specified CUDA device, no copy will be performed.",
350 "Convert to legacy RGBDImage type.")
std::shared_ptr< core::Tensor > image
SizeVector GetStrides() const
SizeVector GetShape() const
The Image class stores image with customizable rows, cols, channels, dtype and device.
Image Dilate(int kernel_size=3) const
Return a new image after performing morphological dilation.
Image ClipTransform(float scale, float min_value, float max_value, float clip_fill=0.0f) const
Return new image after scaling and clipping image values.
@ Super
Super sampling interpolation (only downsample).
@ Lanczos
Lanczos filter interpolation.
@ Linear
Bilinear interpolation.
@ Nearest
Nearest neighbors interpolation.
@ Cubic
Bicubic interpolation.
Image PyrDown() const
Return a new downsampled image with pyramid downsampling.
std::string ToString() const
Text description.
static Image FromLegacy(const cloudViewer::geometry::Image &image_legacy, const core::Device &Device=core::Device("CPU:0"))
Create from a legacy CloudViewer Image.
Image Resize(float sampling_rate=0.5f, InterpType interp_type=InterpType::Nearest) const
Return a new image after resizing with specified interpolation type.
cloudViewer::geometry::Image ToLegacy() const
Convert to legacy Image type.
Image ColorizeDepth(float scale, float min_value, float max_value)
Colorize an input depth image (with Dtype UInt16 or Float32).
core::Tensor GetMinBound() const
Compute min 2D coordinates for the data (always {0, 0}).
Image CreateVertexMap(const core::Tensor &intrinsics, float invalid_fill=0.0f)
Create a vertex map from a depth image using unprojection.
core::Device GetDevice() const override
Get device of the image.
Image FilterGaussian(int kernel_size=3, float sigma=1.0f) const
Return a new image after Gaussian filtering.
core::Tensor GetMaxBound() const
Compute max 2D coordinates for the data ({rows, cols}).
void * GetDataPtr()
Get raw buffer of the Image data.
bool IsEmpty() const override
Returns true if rows * cols * channels == 0.
Image & LinearTransform(double scale=1.0, double offset=0.0)
Function to linearly transform pixel intensities in place.
Image RGBToGray() const
Converts a 3-channel RGB image to a new 1-channel Grayscale image.
Image Clone() const
Returns copy of the image on the same device.
Image To(const core::Device &device, bool copy=false) const
Transfer the image to a specified device.
Image CreateNormalMap(float invalid_fill=0.0f)
Create a normal map from a vertex map.
core::Dtype GetDtype() const
Get dtype of the image.
std::pair< Image, Image > FilterSobel(int kernel_size=3) const
Return a pair of new gradient images (dx, dy) after Sobel filtering.
core::Tensor AsTensor() const
Returns the underlying Tensor of the Image.
int64_t GetChannels() const
Get the number of channels of the image.
Image & Clear() override
Clear image contents by resetting the rows and cols to 0, while keeping channels, dtype and device un...
Image FilterBilateral(int kernel_size=3, float value_sigma=20.0f, float distance_sigma=10.0f) const
Return a new image after bilateral filtering.
int64_t GetCols() const
Get the number of columns of the image.
Image Filter(const core::Tensor &kernel) const
Return a new image after filtering with the given kernel.
int64_t GetRows() const
Get the number of rows of the image.
RGBDImage A pair of color and depth images.
bool IsEmpty() const override
Is any data stored?
Image depth_
The depth image.
core::Tensor GetMinBound() const
Compute min 2D coordinates for the data (always {0,0}).
RGBDImage To(const core::Device &device, bool copy=false) const
bool AreAligned() const
Are the depth and color images aligned (same viewpoint and resolution)?
std::string ToString() const
Text description.
bool aligned_
Are the depth and color images aligned (same viewpoint and resolution)?
cloudViewer::geometry::RGBDImage ToLegacy() const
Convert to the legacy RGBDImage format.
RGBDImage & Clear() override
Clear stored data.
core::Tensor GetMaxBound() const
Compute max 2D coordinates for the data.
Image color_
The color image.
RGBDImage Clone() const
Returns copy of the RGBD image on the same device.
void ClassMethodDocInject(py::module &pybind_module, const std::string &class_name, const std::string &function_name, const std::unordered_map< std::string, std::string > &map_parameter_body_docs)
std::string DtypeToArrayFormat(const core::Dtype &dtype)
void pybind_image(py::module &m)
static const std::unordered_map< std::string, std::string > map_shared_argument_docstrings
Generic file read and write utility for python interface.