ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
StatisticalTestingTools.cpp
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - CloudViewer: www.cloudViewer.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.cloudViewer.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
9 
10 // local
13 #include <NormalDistribution.h>
14 #include <ReferenceCloud.h>
15 #include <ScalarField.h>
16 
17 #include "CVMath.h"
18 #include "Chi2Helper.h"
19 
20 // system
21 #include <list>
22 
23 using namespace cloudViewer;
24 
26 static double CHI2_MAX = 1e7;
27 
30 struct Chi2Class {
31  double pi;
32  int n;
35  Chi2Class() : pi(0.0), n(0) {}
37  Chi2Class(double _pi, int _n) : pi(_pi), n(_n) {}
38 };
39 
41 using Chi2ClassList = std::list<Chi2Class>;
42 
44  const GenericDistribution* distrib,
45  const GenericCloud* cloud,
46  unsigned numberOfClasses,
47  unsigned& finalNumberOfClasses,
48  bool noClassCompression /*=false*/,
49  ScalarType* histoMin /*=0*/,
50  ScalarType* histoMax /*=0*/,
51  unsigned* histoValues /*=0*/,
52  double* npis /*=0*/) {
53  assert(distrib && cloud);
54  unsigned n = cloud->size();
55 
56  if (n == 0 || !distrib->isValid()) return -1.0;
57 
58  // compute min and max (valid) values
59  ScalarType minV = 0, maxV = 0;
60  unsigned numberOfValidValues = 0;
61  {
62  bool firstValidValue = true;
63  for (unsigned i = 0; i < n; ++i) {
64  ScalarType V = cloud->getPointScalarValue(i);
65  if (ScalarField::ValidValue(V)) {
66  if (firstValidValue) {
67  minV = maxV = V;
68  firstValidValue = false;
69  } else {
70  if (V > maxV)
71  maxV = V;
72  else if (V < minV)
73  minV = V;
74  }
75  ++numberOfValidValues;
76  }
77  }
78  }
79 
80  if (numberOfValidValues == 0) return -1.0;
81 
82  if (histoMin) minV = *histoMin;
83  if (histoMax) maxV = *histoMax;
84 
85  // shall we automatically compute the number of classes?
86  if (numberOfClasses == 0) {
87  numberOfClasses = static_cast<unsigned>(
88  ceil(sqrt(static_cast<double>(numberOfValidValues))));
89  }
90  if (numberOfClasses < 2) {
91  return -2.0; // not enough points/classes
92  }
93 
94  // try to allocate the histogram values array (if necessary)
95  unsigned* histo =
96  (histoValues ? histoValues : new unsigned[numberOfClasses]);
97  if (!histo) {
98  // not enough memory
99  return -1.0;
100  }
101  memset(histo, 0, sizeof(unsigned) * numberOfClasses);
102 
103  // accumulate histogram
104  ScalarType dV = maxV - minV;
105  unsigned histoBefore = 0;
106  unsigned histoAfter = 0;
107  if (GreaterThanEpsilon(dV)) {
108  for (unsigned i = 0; i < n; ++i) {
109  ScalarType V = cloud->getPointScalarValue(i);
110  if (ScalarField::ValidValue(V)) {
111  int bin = static_cast<int>(
112  floor((V - minV) *
113  static_cast<ScalarType>(numberOfClasses) / dV));
114  if (bin < 0) {
115  histoBefore++;
116  } else if (bin >= static_cast<int>(numberOfClasses)) {
117  if (V > maxV)
118  histoAfter++;
119  else
120  histo[numberOfClasses - 1]++;
121  } else {
122  histo[bin]++;
123  }
124  }
125  }
126  } else {
127  histo[0] = n;
128  }
129 
130  // we build up the list of classes
131  Chi2ClassList classes;
132  // before?
133  {
134  if (histoBefore) {
135  try {
136  classes.emplace_back(1.0e-6, static_cast<int>(histoBefore));
137  } catch (const std::bad_alloc&) {
138  // not enough memory!
139  return -1.0;
140  }
141  }
142  double p1 = distrib->computePfromZero(minV);
143  for (unsigned k = 1; k <= numberOfClasses; ++k) {
144  double p2 = distrib->computePfromZero(minV +
145  (k * dV) / numberOfClasses);
146 
147  // add the class to the chain
148  Chi2Class currentClass;
149  currentClass.n = histo[k - 1];
150  currentClass.pi = p2 - p1;
151  if (npis) npis[k - 1] = currentClass.pi * numberOfValidValues;
152 
153  try {
154  classes.push_back(currentClass);
155  } catch (const std::bad_alloc&) {
156  // not enough memory!
157  return -1.0;
158  }
159 
160  p1 = p2; // next intervale
161  }
162  if (histoAfter) {
163  try {
164  classes.emplace_back(1.0e-6, static_cast<int>(histoAfter));
165  } catch (const std::bad_alloc&) {
166  // not enough memory!
167  return -1.0;
168  }
169  }
170  }
171 
172  // classes compression
173  if (!noClassCompression) {
174  // lowest acceptable value: "K/n" (K=5 generally, but it could be 3 or 1
175  // at the tail!)
176  double minPi = 5.0 / numberOfValidValues;
177 
178  while (classes.size() > 2) {
179  // we look for the smallest class (smallest "npi")
180  Chi2ClassList::iterator it = classes.begin();
181  Chi2ClassList::iterator minIt = it;
182  for (; it != classes.end(); ++it)
183  if (it->pi < minIt->pi) minIt = it;
184 
185  if (minIt->pi >=
186  minPi) // all classes are bigger than the minimum requirement
187  break;
188 
189  // otherwise we must merge the smallest class with its neighbor (to
190  // make the classes repartition more equilibrated)
191  Chi2ClassList::iterator smallestIt;
192  {
193  Chi2ClassList::iterator nextIt = minIt;
194  ++nextIt;
195  if (minIt == classes.begin()) {
196  smallestIt = nextIt;
197  } else {
198  Chi2ClassList::iterator predIt = minIt;
199  --predIt;
200  smallestIt =
201  (nextIt != classes.end() && nextIt->pi < predIt->pi
202  ? nextIt
203  : predIt);
204  }
205  }
206 
207  smallestIt->pi += minIt->pi;
208  smallestIt->n += minIt->n;
209 
210  // we can remove the current class
211  classes.erase(minIt);
212  }
213  }
214 
215  // we compute the Chi2 distance with the remaining classes
216  double D2 = 0.0;
217  {
218  for (Chi2ClassList::iterator it = classes.begin(); it != classes.end();
219  ++it) {
220  double npi = it->pi * numberOfValidValues;
221  if (npi != 0.0) {
222  double temp = static_cast<double>(it->n) - npi;
223  D2 += temp * (temp / npi);
224  if (D2 >= CHI2_MAX) {
225  D2 = CHI2_MAX;
226  break;
227  }
228  } else {
229  D2 = CHI2_MAX;
230  break;
231  }
232  }
233  }
234 
235  if (!histoValues) delete[] histo;
236 
237  finalNumberOfClasses = static_cast<unsigned>(classes.size());
238 
239  return D2;
240 }
241 
243  return Chi2Helper::critchi(p, d);
244 }
245 
247  int d) {
248  return Chi2Helper::pochisq(chi2result, d);
249 }
250 
252  const GenericDistribution* distrib,
253  GenericIndexedCloudPersist* theCloud,
254  unsigned numberOfNeighbours,
255  double pTrust,
256  GenericProgressCallback* progressCb /*=0*/,
257  DgmOctree* inputOctree /*=0*/) {
258  assert(theCloud);
259 
260  if (!distrib->isValid()) return -1.0;
261 
262  DgmOctree* theOctree = inputOctree;
263  if (!theOctree) {
264  theOctree = new DgmOctree(theCloud);
265  if (theOctree->build(progressCb) < 1) {
266  delete theOctree;
267  return -2.0;
268  }
269  }
270 
271  // on active le champ scalaire (IN) pour recevoir les distances du Chi2
272  theCloud->enableScalarField();
273 
274  unsigned char level = theOctree->findBestLevelForAGivenPopulationPerCell(
275  numberOfNeighbours);
276 
277  unsigned numberOfChi2Classes = static_cast<unsigned>(
278  ceil(sqrt(static_cast<double>(numberOfNeighbours))));
279 
280  // Chi2 hisogram values
281  unsigned* histoValues = new unsigned[numberOfChi2Classes];
282  if (!histoValues) {
283  if (!inputOctree) delete theOctree;
284  return -3.0;
285  }
286 
287  ScalarType *histoMin = nullptr, customHistoMin = 0;
288  ScalarType *histoMax = nullptr, customHistoMax = 0;
289  if (strcmp(distrib->getName(), "Gauss") == 0) {
290  const NormalDistribution* nDist =
291  static_cast<const NormalDistribution*>(distrib);
292  ScalarType mu = 0, sigma2 = 0;
293  nDist->getParameters(mu, sigma2);
294  customHistoMin = mu - static_cast<ScalarType>(3.0) * sqrt(sigma2);
295  histoMin = &customHistoMin;
296  customHistoMax = mu + static_cast<ScalarType>(3.0) * sqrt(sigma2);
297  histoMax = &customHistoMax;
298  } else if (strcmp(distrib->getName(), "Weibull") == 0) {
299  customHistoMin = 0;
300  histoMin = &customHistoMin;
301  }
302 
303  // additionnal parameters for local process
304  void* additionalParameters[] = {
305  reinterpret_cast<void*>(const_cast<GenericDistribution*>(distrib)),
306  reinterpret_cast<void*>(&numberOfNeighbours),
307  reinterpret_cast<void*>(&numberOfChi2Classes),
308  reinterpret_cast<void*>(histoValues),
309  reinterpret_cast<void*>(histoMin),
310  reinterpret_cast<void*>(histoMax)};
311 
312  double maxChi2 = -1.0;
313 
314  // let's compute Chi2 distances
316  level, &computeLocalChi2DistAtLevel, additionalParameters,
317  numberOfNeighbours / 2, numberOfNeighbours * 3, true,
318  progressCb,
319  "Statistical Test") != 0) // success
320  {
321  if (!progressCb || !progressCb->isCancelRequested()) {
322  // theoretical Chi2 fractile
323  maxChi2 = computeChi2Fractile(pTrust, numberOfChi2Classes - 1);
324  maxChi2 = sqrt(maxChi2); // on travaille avec les racines carrees
325  // des distances du Chi2
326  }
327  }
328 
329  delete[] histoValues;
330  histoValues = nullptr;
331 
332  if (!inputOctree) delete theOctree;
333 
334  return maxChi2;
335 }
336 
338  const DgmOctree::octreeCell& cell,
339  void** additionalParameters,
340  NormalizedProgress* nProgress /*=0*/) {
341  // variables additionnelles
342  GenericDistribution* statModel =
343  reinterpret_cast<GenericDistribution*>(additionalParameters[0]);
344  unsigned numberOfNeighbours =
345  *reinterpret_cast<unsigned*>(additionalParameters[1]);
346  unsigned numberOfChi2Classes =
347  *reinterpret_cast<unsigned*>(additionalParameters[2]);
348  unsigned* histoValues =
349  reinterpret_cast<unsigned*>(additionalParameters[3]);
350  ScalarType* histoMin =
351  reinterpret_cast<ScalarType*>(additionalParameters[4]);
352  ScalarType* histoMax =
353  reinterpret_cast<ScalarType*>(additionalParameters[5]);
354 
355  // number of points in the current cell
356  unsigned n = cell.points->size();
357 
359  nNSS.level = cell.level;
360  nNSS.minNumberOfNeighbors = numberOfNeighbours;
361  cell.parentOctree->getCellPos(cell.truncatedCode, cell.level, nNSS.cellPos,
362  true);
363  cell.parentOctree->computeCellCenter(nNSS.cellPos, cell.level,
364  nNSS.cellCenter);
365 
366  // we already know the points of the first cell (this is the one we are
367  // currently processing!)
368  {
369  try {
370  nNSS.pointsInNeighbourhood.resize(n);
371  } catch (const std::bad_alloc&) // out of memory
372  {
373  return false;
374  }
375 
376  DgmOctree::NeighboursSet::iterator it =
377  nNSS.pointsInNeighbourhood.begin();
378  for (unsigned j = 0; j < n; ++j, ++it) {
379  it->point = cell.points->getPointPersistentPtr(j);
380  it->pointIndex = cell.points->getPointGlobalIndex(j);
381  }
383  }
384 
385  ReferenceCloud neighboursCloud(cell.points->getAssociatedCloud());
386  if (!neighboursCloud.reserve(numberOfNeighbours)) {
387  // not enough memory!
388  return false;
389  }
390 
391  for (unsigned i = 0; i < n; ++i) {
392  cell.points->getPoint(i, nNSS.queryPoint);
393  ScalarType D = cell.points->getPointScalarValue(i);
394 
395  if (ScalarField::ValidValue(D)) {
396  // nNSS.theNearestPoints.clear();
397 
398  unsigned k =
400  nNSS, true);
401  if (k > numberOfNeighbours) k = numberOfNeighbours;
402 
403  neighboursCloud.clear();
404  for (unsigned j = 0; j < k; ++j)
405  neighboursCloud.addPointIndex(
406  nNSS.pointsInNeighbourhood[j].pointIndex);
407 
408  unsigned finalNumberOfChi2Classes = 0;
409  // LAZY VERSION (approximate test)
410  double Chi2Dist = static_cast<ScalarType>(computeAdaptativeChi2Dist(
411  statModel, &neighboursCloud, numberOfChi2Classes,
412  finalNumberOfChi2Classes, true, histoMin, histoMax,
413  histoValues));
414  // STRICT VERSION (ultra-precise test)
415  // double Chi2Dist =
416  // static_cast<ScalarType>(computeAdaptativeChi2Dist(statModel,
417  // &neighboursCloud, numberOfChi2Classes, finalNumberOfChi2Classes,
418  // false, histoMin, histoMax, histoValues));
419 
420  D = (Chi2Dist >= 0.0 ? static_cast<ScalarType>(sqrt(Chi2Dist))
421  : NAN_VALUE);
422  }
423 
424  // We assume that "IN" and "OUT" scalar fields are different!
425  cell.points->setPointScalarValue(i, D);
426 
427  if (nProgress && !nProgress->oneStep()) return false;
428  }
429 
430  return true;
431 }
constexpr ScalarType NAN_VALUE
NaN as a ScalarType value.
Definition: CVConst.h:76
static double pochisq(double x, int df)
Probability of chi-square value.
Definition: Chi2Helper.h:108
static double critchi(double p, int df)
Compute critical chi-square value toproduce given p.
Definition: Chi2Helper.h:148
The octree structure used throughout the library.
Definition: DgmOctree.h:39
void getCellPos(CellCode code, unsigned char level, Tuple3i &cellPos, bool isCodeTruncated) const
Definition: DgmOctree.cpp:498
unsigned findNearestNeighborsStartingFromCell(NearestNeighboursSearchStruct &nNSS, bool getOnlyPointsWithValidScalar=false) const
Definition: DgmOctree.cpp:1655
void computeCellCenter(CellCode code, unsigned char level, CCVector3 &center, bool isCodeTruncated=false) const
Definition: DgmOctree.h:862
int build(GenericProgressCallback *progressCb=nullptr)
Builds the structure.
Definition: DgmOctree.cpp:196
unsigned executeFunctionForAllCellsStartingAtLevel(unsigned char startingLevel, octreeCellFunc func, void **additionalParameters, unsigned minNumberOfPointsPerCell, unsigned maxNumberOfPointsPerCell, bool multiThread=true, GenericProgressCallback *progressCb=nullptr, const char *functionTitle=nullptr, int maxThreadCount=0)
Definition: DgmOctree.cpp:3820
unsigned char findBestLevelForAGivenPopulationPerCell(unsigned indicativeNumberOfPointsPerCell) const
Definition: DgmOctree.cpp:2737
virtual bool enableScalarField()=0
Enables the scalar field associated to the cloud.
virtual unsigned size() const =0
Returns the number of points.
virtual ScalarType getPointScalarValue(unsigned pointIndex) const =0
Returns the ith point associated scalar value.
A generic class to handle a probability distribution.
virtual const char * getName() const =0
Returns distribution name.
virtual bool isValid() const
Indicates if the distribution parameters are valid.
virtual double computePfromZero(ScalarType x) const =0
Computes the cumulative probability between 0 and x.
A generic 3D point cloud with index-based and presistent access to points.
virtual bool isCancelRequested()=0
Checks if the process should be canceled.
The Normal/Gaussian statistical distribution.
bool getParameters(ScalarType &_mu, ScalarType &_sigma2) const
Returns the distribution parameters.
bool oneStep()
Increments total progress value of a single unit.
A very simple point cloud (no point duplication)
void setPointScalarValue(unsigned pointIndex, ScalarType value) override
Sets the ith point associated scalar value.
virtual bool addPointIndex(unsigned globalIndex)
Point global index insertion mechanism.
virtual GenericIndexedCloudPersist * getAssociatedCloud()
Returns the associated (source) cloud.
unsigned size() const override
Returns the number of points.
virtual unsigned getPointGlobalIndex(unsigned localIndex) const
virtual void clear(bool releaseMemory=false)
Clears the cloud.
const CCVector3 * getPointPersistentPtr(unsigned index) override
Returns the ith point as a persistent pointer.
virtual bool reserve(unsigned n)
Reserves some memory for hosting the point references.
const CCVector3 * getPoint(unsigned index) const override
Returns the ith point.
ScalarType getPointScalarValue(unsigned pointIndex) const override
Returns the ith point associated scalar value.
static bool ValidValue(ScalarType value)
Returns whether a scalar value is valid or not.
Definition: ScalarField.h:61
static double testCloudWithStatisticalModel(const GenericDistribution *distrib, GenericIndexedCloudPersist *theCloud, unsigned numberOfNeighbours, double pTrust, GenericProgressCallback *progressCb=nullptr, DgmOctree *inputOctree=nullptr)
static double computeAdaptativeChi2Dist(const GenericDistribution *distrib, const GenericCloud *cloud, unsigned numberOfClasses, unsigned &finalNumberOfClasses, bool noClassCompression=false, ScalarType *histoMin=nullptr, ScalarType *histoMax=nullptr, unsigned *histoValues=nullptr, double *npis=nullptr)
Computes the Chi2 distance on a sample of scalar values.
static double computeChi2Probability(double chi2result, int d)
Computes the Chi2 confidence probability.
static double computeChi2Fractile(double p, int d)
Computes the Chi2 fractile.
static bool computeLocalChi2DistAtLevel(const DgmOctree::octreeCell &cell, void **additionalParameters, NormalizedProgress *nProgress=nullptr)
Computes (locally) the Chi2 distance inside an octree cell.
static double CHI2_MAX
Max computable Chi2 distance.
std::list< Chi2Class > Chi2ClassList
An ordered list of Chi2 classes.
MiniVec< float, N > floor(const MiniVec< float, N > &a)
Definition: MiniVec.h:75
MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Definition: MiniVec.h:89
Generic file read and write utility for python interface.
bool GreaterThanEpsilon(float x)
Test a floating point number against our epsilon (a very small number).
Definition: CVMath.h:37
cloudViewer::NormalizedProgress * nProgress
Chi2Class(double _pi, int _n)
Constructor from parameters.
Chi2Class()
Default constructor.
Container of in/out parameters for nearest neighbour(s) search.
Definition: DgmOctree.h:161
unsigned char level
Level of subdivision of the octree at which to start the search.
Definition: DgmOctree.h:171
Tuple3i cellPos
Position in the octree of the cell including the query point.
Definition: DgmOctree.h:184
CCVector3 cellCenter
Coordinates of the center of the cell including the query point.
Definition: DgmOctree.h:189
unsigned minNumberOfNeighbors
Minimal number of neighbours to find.
Definition: DgmOctree.h:177
Octree cell descriptor.
Definition: DgmOctree.h:354
ReferenceCloud * points
Set of points lying inside this cell.
Definition: DgmOctree.h:365
const DgmOctree * parentOctree
Octree to which the cell belongs.
Definition: DgmOctree.h:359
unsigned char level
Cell level of subdivision.
Definition: DgmOctree.h:367
CellCode truncatedCode
Truncated cell code.
Definition: DgmOctree.h:361