ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
docstring.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include "pybind/docstring.h"
9 
10 #include <Helper.h>
11 #include <Logging.h>
12 
13 #include <regex>
14 #include <sstream>
15 #include <string>
16 #include <tuple>
17 #include <unordered_map>
18 #include <unordered_set>
19 
20 namespace cloudViewer {
21 namespace docstring {
22 
23 // ref: enum_base in pybind11.h
24 py::handle static_property =
25  py::handle((PyObject*)py::detail::get_internals().static_property_type);
26 
27 void ClassMethodDocInject(py::module& pybind_module,
28  const std::string& class_name,
29  const std::string& function_name,
30  const std::unordered_map<std::string, std::string>&
31  map_parameter_body_docs) {
32  // Get function
33  PyObject* module = pybind_module.ptr();
34  PyObject* class_obj = PyObject_GetAttrString(module, class_name.c_str());
35  if (class_obj == nullptr) {
36  utility::LogWarning("{} docstring failed to inject.", class_name);
37  return;
38  }
39  PyObject* class_method_obj =
40  PyObject_GetAttrString(class_obj, function_name.c_str());
41  if (class_method_obj == nullptr) {
42  utility::LogWarning("{}::{} docstring failed to inject.", class_name,
43  function_name);
44  return;
45  }
46 
47  // Extract PyCFunctionObject
48  PyCFunctionObject* f = nullptr;
49  if (Py_TYPE(class_method_obj) == &PyInstanceMethod_Type) {
50  PyInstanceMethodObject* class_method =
51  (PyInstanceMethodObject*)class_method_obj;
52  f = (PyCFunctionObject*)class_method->func;
53  }
54  if (Py_TYPE(class_method_obj) == &PyCFunction_Type) {
55  // def_static in Pybind is PyCFunction_Type, no need to convert
56  f = (PyCFunctionObject*)class_method_obj;
57  }
58  if (f == nullptr || Py_TYPE(f) != &PyCFunction_Type) {
59  return;
60  }
61 
62  // Parse existing docstring to FunctionDoc
63  FunctionDoc fd(f->m_ml->ml_doc);
64 
65  // Inject docstring
66  for (auto& overload : fd.overload_docs_) {
67  for (ArgumentDoc& ad : overload.argument_docs_) {
68  if (map_parameter_body_docs.count(ad.name_) > 0) {
69  ad.body_ = map_parameter_body_docs.at(ad.name_);
70  }
71  }
72  }
73  f->m_ml->ml_doc = strdup(fd.ToGoogleDocString().c_str());
74 }
75 
76 void FunctionDocInject(py::module& pybind_module,
77  const std::string& function_name,
78  const std::unordered_map<std::string, std::string>&
79  map_parameter_body_docs) {
80  // Get function
81  PyObject* module = pybind_module.ptr();
82  PyObject* f_obj = PyObject_GetAttrString(module, function_name.c_str());
83  if (f_obj == nullptr) {
84  utility::LogWarning("{} docstring failed to inject.", function_name);
85  return;
86  }
87  if (Py_TYPE(f_obj) != &PyCFunction_Type) {
88  return;
89  }
90  PyCFunctionObject* f = (PyCFunctionObject*)f_obj;
91 
92  // Parse existing docstring to FunctionDoc
93  FunctionDoc fd(f->m_ml->ml_doc);
94 
95  // Inject docstring: repeat for each overload
96  for (auto& overload : fd.overload_docs_) {
97  for (ArgumentDoc& ad : overload.argument_docs_) {
98  if (map_parameter_body_docs.count(ad.name_) > 0) {
99  ad.body_ = map_parameter_body_docs.at(ad.name_);
100  }
101  }
102  }
103  f->m_ml->ml_doc = strdup(fd.ToGoogleDocString().c_str());
104 }
105 
106 FunctionDoc::FunctionDoc(const std::string& pybind_doc)
107  : pybind_doc_(pybind_doc) {
109  doc_pos_[1] = ParseSummary();
110  // Repeat for each overload:
111  for (; doc_pos_[1] != std::string::npos; doc_pos_[1] = ParseSummary()) {
112  ParseArguments();
113  ParseReturn();
114  doc_pos_[0] = doc_pos_[1];
115  }
116 }
117 
119  size_t parenthesis_pos = pybind_doc_.find("(");
120  if (parenthesis_pos != std::string::npos) {
121  std::string name = pybind_doc_.substr(0, parenthesis_pos);
122  name_ = name;
123  }
124  size_t preamble_end = pybind_doc_.find("Overloaded function.");
125  if (preamble_end == std::string::npos) {
126  return parenthesis_pos;
127  } else {
128  preamble_end += strlen("Overloaded function.");
129  preamble_ = pybind_doc_.substr(0, preamble_end);
130  return preamble_end;
131  }
132 }
133 
135  size_t arrow_pos = pybind_doc_.find(" -> ", doc_pos_[0]);
136  size_t summary_end_pos = std::string::npos;
137  if (arrow_pos != std::string::npos) {
138  overload_docs_.push_back(OverloadDocs{});
139  size_t result_type_pos = arrow_pos + 4;
140  size_t summary_start_pos =
141  result_type_pos + utility::WordLength(pybind_doc_,
142  result_type_pos,
143  "._:,[]() ,\"");
144  summary_end_pos =
145  pybind_doc_.find(". " + name_ + "(", summary_start_pos);
146  if (summary_end_pos == std::string::npos)
147  summary_end_pos = pybind_doc_.size(); // Last overload
148  else
149  summary_end_pos -= 3; // \n\n[:digit:]
150  size_t summary_len = summary_end_pos - summary_start_pos;
151  if (summary_len > 0) {
152  std::string summary =
153  pybind_doc_.substr(summary_start_pos, summary_len);
154  overload_docs_.back().summary_ = StringCleanAll(summary);
155  }
156  }
157  return summary_end_pos;
158 }
159 
161  // Parse docstrings of arguments
162  // Input: "foo(arg0: float, arg1: float = 1.0, arg2: int = 1) ->
163  // cloudViewer.bar" Goal: split to {"arg0: float", "arg1: float = 1.0",
164  // "arg2: int = 1"} and
165  // call function to parse each argument respectively
166  std::vector<std::string> argument_tokens = GetArgumentTokens(
167  pybind_doc_.substr(doc_pos_[0], doc_pos_[1] - doc_pos_[0]));
168  overload_docs_.back().argument_docs_.clear();
169  for (const std::string& argument_token : argument_tokens) {
170  overload_docs_.back().argument_docs_.push_back(
171  ParseArgumentToken(argument_token));
172  }
173 }
174 
176  size_t arrow_pos = pybind_doc_.rfind(" -> ", doc_pos_[1]);
177  if (arrow_pos != std::string::npos && arrow_pos > doc_pos_[0]) {
178  size_t result_type_pos = arrow_pos + 4;
179  std::string return_type = pybind_doc_.substr(
180  result_type_pos,
181  utility::WordLength(pybind_doc_, result_type_pos,
182  "._:,[]() ,\""));
183  overload_docs_.back().return_doc_.type_ = StringCleanAll(return_type);
184  }
185 }
186 
187 std::string FunctionDoc::ToGoogleDocString() const {
188  // Example Google style:
189  // http://www.sphinx-doc.org/en/1.5/ext/example_google.html
190 
191  std::ostringstream rc;
192  std::string indent = " ";
193  size_t n_overload = 1;
194 
195  if (!preamble_.empty()) {
196  rc << preamble_ << std::endl << std::endl;
197  }
198 
199  for (auto& overload : overload_docs_) {
200  // Function signature to be parsed by Sphinx
201  if (!preamble_.empty()) rc << std::endl << n_overload++ << ". ";
202  rc << name_ << "(";
203  for (size_t i = 0; i < overload.argument_docs_.size(); ++i) {
204  const ArgumentDoc& argument_doc = overload.argument_docs_[i];
205  rc << argument_doc.name_;
206  if (argument_doc.default_ != "") {
207  rc << "=" << argument_doc.default_;
208  }
209  if (i != overload.argument_docs_.size() - 1) {
210  rc << ", ";
211  }
212  }
213  rc << ")" << std::endl;
214 
215  // Summary line, strictly speaking this shall be at the very front.
216  // However from a compiled Python module we need the function signature
217  // hints in front for Sphinx parsing and PyCharm autocomplete
218  if (overload.summary_ != "") {
219  if (!preamble_.empty()) rc << indent;
220  rc << overload.summary_ << std::endl;
221  }
222 
223  // Arguments
224  if (overload.argument_docs_.size() != 0 &&
225  !(overload.argument_docs_.size() == 1 &&
226  overload.argument_docs_[0].name_ == "self")) {
227  rc << std::endl;
228  rc << "Args:" << std::endl;
229  for (const ArgumentDoc& argument_doc : overload.argument_docs_) {
230  if (argument_doc.name_ == "self") {
231  continue;
232  }
233  rc << indent << argument_doc.name_ << " ("
234  << argument_doc.type_;
235  if (argument_doc.default_ != "") {
236  rc << ", optional";
237  }
238  if (argument_doc.default_ != "" &&
239  argument_doc.long_default_ == "") {
240  rc << ", default=" << argument_doc.default_;
241  }
242  rc << ")";
243  if (argument_doc.body_ != "") {
244  rc << ": " << argument_doc.body_;
245  }
246  if (argument_doc.long_default_ != "") {
247  std::vector<std::string> lines = utility::SplitString(
248  argument_doc.long_default_, "\n", true);
249  rc << " Default value:" << std::endl << std::endl;
250  bool prev_line_is_listing = false;
251  for (std::string& line : lines) {
252  line = StringCleanAll(line);
253  if (line[0] == '-') { // listing
254  // Add empty line before listing
255  if (!prev_line_is_listing) {
256  rc << std::endl;
257  }
258  prev_line_is_listing = true;
259  } else {
260  prev_line_is_listing = false;
261  }
262  rc << indent << indent << line << std::endl;
263  }
264  } else {
265  rc << std::endl;
266  }
267  }
268  }
269 
270  // Return
271  if (name_ != "__init__") {
272  rc << std::endl;
273  rc << "Returns:" << std::endl;
274  rc << indent << overload.return_doc_.type_;
275  if (overload.return_doc_.body_ != "") {
276  rc << ": " << overload.return_doc_.body_;
277  }
278  rc << std::endl;
279  }
280  }
281  return rc.str();
282 }
283 
284 std::string FunctionDoc::StringCleanAll(std::string& s,
285  const std::string& white_space) {
286  std::string rc = utility::StripString(s, white_space);
287  return rc;
288 }
289 
290 ArgumentDoc FunctionDoc::ParseArgumentToken(const std::string& argument_token) {
291  ArgumentDoc argument_doc;
292 
293  // Argument with default value
294  std::regex rgx_with_default(
295  "([A-Za-z_][A-Za-z\\d_]*): "
296  "([A-Za-z_][A-Za-z\\d_:\\.\\[\\]\\(\\) ,]*) = (.*)");
297  std::smatch matches;
298  if (std::regex_search(argument_token, matches, rgx_with_default)) {
299  argument_doc.name_ = matches[1].str();
300  argument_doc.type_ = matches[2].str();
301  argument_doc.default_ = matches[3].str();
302 
303  // Handle long default value. Long default has multiple lines and thus
304  // they are not displayed in signature, but in docstrings.
305  size_t default_start_pos = matches.position(3);
306  if (default_start_pos + argument_doc.default_.size() <
307  argument_token.size()) {
308  argument_doc.long_default_ = argument_token.substr(
309  default_start_pos,
310  argument_token.size() - default_start_pos);
311  argument_doc.default_ = "(with default value)";
312  }
313  }
314 
315  else {
316  // Argument without default value
317  std::regex rgx_without_default(
318  "([A-Za-z_][A-Za-z\\d_]*): "
319  "([A-Za-z_][A-Za-z\\d_:\\.\\[\\]\\(\\) ,]*)");
320  if (std::regex_search(argument_token, matches, rgx_without_default)) {
321  argument_doc.name_ = matches[1].str();
322  argument_doc.type_ = matches[2].str();
323  }
324  }
325 
326  return argument_doc;
327 }
328 
329 std::vector<std::string> FunctionDoc::GetArgumentTokens(
330  const std::string& pybind_doc) {
331  // First insert commas to make things easy
332  // From:
333  // "foo(arg0: float, arg1: float = 1.0, arg2: int = 1) -> cloudViewer.bar"
334  // To:
335  // "foo(, arg0: float, arg1: float = 1.0, arg2: int = 1) -> cloudViewer.bar"
336  std::string str = pybind_doc;
337  size_t parenthesis_pos = str.find("(");
338  if (parenthesis_pos == std::string::npos) {
339  return {};
340  } else {
341  str.replace(parenthesis_pos + 1, 0, ", ");
342  }
343 
344  // Get start positions
345  std::regex pattern("(, [A-Za-z_][A-Za-z\\d_]*:)");
346  std::smatch res;
347  std::string::const_iterator start_iter(str.cbegin());
348  std::vector<size_t> argument_start_positions;
349  while (std::regex_search(start_iter, str.cend(), res, pattern)) {
350  size_t pos = res.position(0) + (start_iter - str.cbegin());
351  start_iter = res.suffix().first;
352  // Now the pos include ", ", which needs to be removed
353  argument_start_positions.push_back(pos + 2);
354  }
355 
356  // Get end positions (non-inclusive)
357  // The 1st argument's end pos is 2nd argument's start pos - 2 and etc.
358  // The last argument's end pos is the location of the parenthesis before ->
359  std::vector<size_t> argument_end_positions;
360  for (size_t i = 0; i + 1 < argument_start_positions.size(); ++i) {
361  argument_end_positions.push_back(argument_start_positions[i + 1] - 2);
362  }
363  std::size_t arrow_pos = str.rfind(") -> ");
364  if (arrow_pos == std::string::npos) {
365  return {};
366  } else {
367  argument_end_positions.push_back(arrow_pos);
368  }
369 
370  std::vector<std::string> argument_tokens;
371  for (size_t i = 0; i < argument_start_positions.size(); ++i) {
372  std::string token = str.substr(
373  argument_start_positions[i],
374  argument_end_positions[i] - argument_start_positions[i]);
375  argument_tokens.push_back(token);
376  }
377  return argument_tokens;
378 }
379 
380 } // namespace docstring
381 } // namespace cloudViewer
std::string name
static std::vector< std::string > GetArgumentTokens(const std::string &pybind_doc)
Definition: docstring.cpp:329
std::vector< OverloadDocs > overload_docs_
Definition: docstring.h:108
std::string ToGoogleDocString() const
Generate Google style python docstring.
Definition: docstring.cpp:187
FunctionDoc(const std::string &pybind_doc)
Definition: docstring.cpp:106
void ParseReturn()
Parse function return.
Definition: docstring.cpp:175
static ArgumentDoc ParseArgumentToken(const std::string &argument_token)
Parse individual argument token and returns a ArgumentDoc.
Definition: docstring.cpp:290
static std::string StringCleanAll(std::string &s, const std::string &white_space=" \t\n")
Runs all string cleanup functions.
Definition: docstring.cpp:284
void ParseArguments()
Parse ArgumentDoc for each argument.
Definition: docstring.cpp:160
#define LogWarning(...)
Definition: Logging.h:72
Helper functions for the ml ops.
QTextStream & endl(QTextStream &stream)
Definition: QtCompat.h:718
void ClassMethodDocInject(py::module &pybind_module, const std::string &class_name, const std::string &function_name, const std::unordered_map< std::string, std::string > &map_parameter_body_docs)
Definition: docstring.cpp:27
py::handle static_property
Definition: docstring.cpp:24
void FunctionDocInject(py::module &pybind_module, const std::string &function_name, const std::unordered_map< std::string, std::string > &map_parameter_body_docs)
Definition: docstring.cpp:76
size_t WordLength(const std::string &doc, size_t start_pos, const std::string &valid_chars="_")
Definition: Helper.cpp:257
void SplitString(std::vector< std::string > &tokens, const std::string &str, const std::string &delimiters=" ", bool trim_empty_str=true)
Definition: Helper.cpp:197
std::string & StripString(std::string &str, const std::string &chars="\t\n\v\f\r ")
Definition: Helper.cpp:238
Generic file read and write utility for python interface.