ACloudViewer  3.9.4
A Modern Library for 3D Data Processing
SparseBundleCU.cpp
Go to the documentation of this file.
1 // File: SparseBundleCU.cpp
3 // Author: Changchang Wu
4 // Description : implementation of the CUDA-based multicore bundle adjustment
5 //
6 // Copyright (c) 2011 Changchang Wu (ccwu@cs.washington.edu)
7 // and the University of Washington at Seattle
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU General Public
11 // License as published by the Free Software Foundation; either
12 // Version 3 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // General Public License for more details.
18 //
20 
21 #include <vector>
22 #include <iostream>
23 #include <utility>
24 #include <algorithm>
25 #include <fstream>
26 #include <iomanip>
27 using std::vector;
28 using std::cout;
29 using std::pair;
30 using std::ofstream;
31 
32 #include <stdlib.h>
33 #include <math.h>
34 #include <float.h>
35 #include "pba.h"
36 #include "SparseBundleCU.h"
37 
38 #include "ProgramCU.h"
39 
40 using namespace pba::ProgramCU;
41 
42 #ifdef _WIN32
43 #define finite _finite
44 #endif
45 
46 namespace pba {
47 
48 typedef float float_t; // data type for host computation; double doesn't make
49  // much difference
50 
51 #define CHECK_VEC(v1, v2) \
52  for (size_t j = 0; j < v1.size(); ++j) { \
53  if (v1[j] != v2[j]) { \
54  different++; \
55  std::cout << i << ' ' << j << ' ' << v1[j] << ' ' << v2[j] << '\n'; \
56  } \
57  }
58 #define DEBUG_FUNCN(v, func, input, N) \
59  if (__debug_pba && v.IsValid()) { \
60  vector<float> buf(v.GetLength()), buf_(v.GetLength()); \
61  for (int i = 0; i < N; ++i) { \
62  int different = 0; \
63  func input; \
64  ProgramCU::FinishWorkCUDA(); \
65  if (i > 0) { \
66  v.CopyToHost(&buf_[0]); \
67  CHECK_VEC(buf, buf_); \
68  } else { \
69  v.CopyToHost(&buf[0]); \
70  } \
71  if (different != 0) \
72  std::cout << #func << " : " << i << " : " << different << '\n'; \
73  } \
74  }
75 #define DEBUG_FUNC(v, func, input) DEBUG_FUNCN(v, func, input, 2)
76 
78  : ParallelBA(PBA_INVALID_DEVICE),
79  _num_camera(0),
80  _num_point(0),
81  _num_imgpt(0),
82  _num_imgpt_q(0),
83  _camera_data(NULL),
84  _point_data(NULL),
85  _imgpt_data(NULL),
86  _camera_idx(NULL),
87  _point_idx(NULL),
88  _projection_sse(0) {
89  __selected_device = device;
90 }
91 
94  size_t sz = ProgramCU::GetCudaMemoryCap();
95  if (sz < 1024) std::cout << "WARNING: CUDA is unlikely to be supported!\n";
96  return sz < 1024 ? 0 : sz;
97 }
98 
99 void SparseBundleCU::SetCameraData(size_t ncam, CameraT* cams) {
100  if (sizeof(CameraT) != 16 * sizeof(float)) exit(0); // never gonna happen...?
101  _num_camera = (int)ncam;
102  _camera_data = cams;
103  _focal_mask = NULL;
104 }
105 
106 void SparseBundleCU::SetFocalMask(const int* fmask, float weight) {
107  _focal_mask = fmask;
108  _weight_q = weight;
109 }
110 
111 void SparseBundleCU::SetPointData(size_t npoint, Point3D* pts) {
112  _num_point = (int)npoint;
113  _point_data = (float*)pts;
114 }
115 
116 void SparseBundleCU::SetProjection(size_t nproj, const Point2D* imgpts,
117  const int* point_idx, const int* cam_idx) {
118  _num_imgpt = (int)nproj;
119  _imgpt_data = (float*)imgpts;
120  _camera_idx = cam_idx;
121  _point_idx = point_idx;
122  _imgpt_datax.resize(0);
123 }
124 
126  return float(_projection_sse /
128 }
129 
131  if (ValidateInputData() != STATUS_SUCCESS) return;
132 
133  //
134 
136  TimerBA timer(this, TIMER_OVERALL);
137 
138  NormalizeData();
139  if (InitializeBundle() != STATUS_SUCCESS) {
140  // failed to allocate gpu storage
141  } else if (__profile_pba) {
142  // profiling some stuff
143  RunProfileSteps();
144  } else {
145  // real optimization
149  }
150  DenormalizeData();
151 }
152 
157  if (__num_lm_success > 0)
161  return __num_lm_success;
162 }
163 
165  bool previous_allocated = __memory_usage > 0;
166 
167  bool success = TransferDataToGPU() && InitializeStorageForCG();
168  if (!success && previous_allocated) {
169  if (__verbose_level) std::cout << "WARNING: try clean allocation\n";
172  success = TransferDataToGPU() && InitializeStorageForCG();
173  }
174 
175  if (!success && __jc_store_original) {
176  if (__verbose_level) std::cout << "WARNING: try not storing original JC\n";
177  __jc_store_original = false;
180  success = TransferDataToGPU() && InitializeStorageForCG();
181  }
182  if (!success && __jc_store_transpose) {
183  if (__verbose_level) std::cout << "WARNING: try not storing transpose JC\n";
184  __jc_store_transpose = false;
187  success = TransferDataToGPU() && InitializeStorageForCG();
188  }
189  if (!success && !__no_jacobian_store) {
190  if (__verbose_level) std::cout << "WARNING: switch to memory saving mode\n";
191  __no_jacobian_store = true;
194  success = TransferDataToGPU() && InitializeStorageForCG();
195  }
196  return success;
197 }
198 
200  if (_camera_data == NULL) return STATUS_CAMERA_MISSING;
201  if (_point_data == NULL) return STATUS_POINT_MISSING;
203  if (_camera_idx == NULL || _point_idx == NULL)
205  return STATUS_SUCCESS;
206 }
207 
209  std::cout << "Warm up device with storage allocation...\n";
213 }
214 
222  return STATUS_SUCCESS;
223 }
224 
226  return _num_camera * 8 + 4 * _num_point;
227 }
228 
230  if (CheckRequiredMem(0)) return true;
231  if (__jc_store_original) {
232  if (__verbose_level) std::cout << "NOTE: not storing original JC\n";
233  __jc_store_original = false;
234  if (CheckRequiredMem(1)) return true;
235  }
236  if (__jc_store_transpose) {
237  if (__verbose_level) std::cout << "NOTE: not storing camera Jacobian\n";
238  __jc_store_transpose = false;
239  if (CheckRequiredMem(1)) return true;
240  }
241  if (!__no_jacobian_store) {
242  if (__verbose_level) std::cout << "NOTE: not storing any Jacobian\n";
243  __no_jacobian_store = true;
244  if (CheckRequiredMem(1)) return true;
245  }
246  return false;
247 }
248 
250  int m = _num_camera, n = _num_point, k = _num_imgpt;
251 #ifdef PBA_CUDA_ALLOCATE_MORE
252  if (!fresh) {
253  int m0 = _cuCameraData.GetReservedWidth();
254  m = std::max(m, m0);
255  int n0 = _cuPointData.GetReservedWidth();
256  n = std::max(n, n0);
258  k = std::max(k, k0);
259  }
260 #endif
261 
262  int p = 8 * m + 4 * n, q = _num_imgpt_q;
263  size_t szn, total = GetCudaMemoryCap();
264  size_t sz0 = 800 * 600 * 2 * 4 * sizeof(float); //
265  size_t szq = q > 0 ? (sizeof(float) * (q + m) * 4) : 0;
266  size_t sz = sizeof(float) * (258 + 9 * n + 33 * m + 7 * k) + sz0;
267 
269  sz += p * 6 * sizeof(float);
270  sz += ((__use_radial_distortion ? 64 : 56) * m + 12 * n) * sizeof(float);
271  sz += (2 * (k + q) * sizeof(float));
272  if (sz > total) return false;
273 
275  szn = (__no_jacobian_store ? 0 : (sizeof(float) * 8 * k));
276  if (sz + szn > total)
277  __no_jacobian_store = false;
278  else
279  sz += szn;
281  szn = ((!__no_jacobian_store && __jc_store_transpose) ? 16 * k * sizeof(float)
282  : 0);
283  if (sz + szn > total)
284  __jc_store_transpose = false;
285  else
286  sz += szn;
288  szn = ((!__no_jacobian_store && __jc_store_original) ? 16 * k * sizeof(float)
289  : 0);
290  if (sz + szn > total)
291  __jc_store_original = false;
292  else
293  sz += szn;
296  ? k * sizeof(int)
297  : 0);
298  if (sz + szn > total) {
299  __jc_store_transpose = false;
300  sz -= (16 * k * sizeof(float));
301  } else
302  sz += szn;
303 
304  return sz <= total;
305 }
306 
307 void SparseBundleCU::ReserveStorage(size_t ncam, size_t npt, size_t nproj) {
308  if (ncam <= 1 || npt <= 1 || nproj <= 1) {
310  // Reset the memory strategy to the default.
311  __jc_store_transpose = true;
312  __jc_store_original = true;
313  __no_jacobian_store = false;
314  } else {
315  const int* camidx = _camera_idx;
316  const int* ptidx = _point_idx;
317  int ncam_ = _num_camera;
318  int npt_ = _num_point;
319  int nproj_ = _num_imgpt;
320 
321 #ifdef PBA_CUDA_ALLOCATE_MORE
322  size_t ncam_reserved = _cuCameraData.GetReservedWidth();
323  size_t npt_reserved = _cuPointData.GetReservedWidth();
324  size_t nproj_reserved = _cuMeasurements.GetReservedWidth();
325  ncam = std::max(ncam, ncam_reserved);
326  npt = std::max(npt, npt_reserved);
327  nproj = std::max(nproj, nproj_reserved);
328 #endif
329 
330  _camera_idx = NULL;
331  _point_idx = NULL;
332  _num_camera = (int)ncam;
333  _num_point = (int)npt;
334  _num_imgpt = (int)nproj;
335 
336  if (__verbose_level)
337  std::cout << "Reserving storage for ncam = " << ncam << "; npt = " << npt
338  << "; nproj = " << nproj << '\n';
340 
341  _num_camera = ncam_;
342  _num_point = npt_;
343  _num_imgpt = nproj_;
344  _camera_idx = camidx;
345  _point_idx = ptidx;
346  }
347 }
348 
349 static size_t upgrade_dimension(size_t sz) {
350  size_t x = 1;
351  while (x < sz) x <<= 1;
352  return x;
353 }
354 
356  if (_cuCameraData.data() == NULL || _cuPointData.data() == NULL ||
358  return;
361 }
362 
363 #define REPORT_ALLOCATION(NAME) \
364  if (__verbose_allocation && NAME.GetDataSize() > 1024) \
365  std::cout << (NAME.GetDataSize() > 1024 * 1024 \
366  ? NAME.GetDataSize() / 1024 / 1024 \
367  : NAME.GetDataSize() / 1024) \
368  << (NAME.GetDataSize() > 1024 * 1024 ? "MB" : "KB") \
369  << "\t allocated for " #NAME "\n";
370 
371 #define ASSERT_ALLOCATION(NAME) \
372  if (!success) { \
373  std::cerr << "WARNING: failed to allocate " \
374  << (__verbose_allocation ? #NAME "; size = " : "") \
375  << (total_sz / 1024 / 1024) << "MB + " \
376  << (NAME.GetRequiredSize() / 1024 / 1024) << "MB\n"; \
377  return false; \
378  } else { \
379  total_sz += NAME.GetDataSize(); \
380  REPORT_ALLOCATION(NAME); \
381  }
382 
383 #define CHECK_ALLOCATION(NAME) \
384  if (NAME.GetDataSize() == 0 && NAME.GetRequiredSize() > 0) { \
385  ClearPreviousError(); \
386  std::cerr << "WARNING: unable to allocate " #NAME ": " \
387  << (NAME.GetRequiredSize() / 1024 / 1024) << "MB\n"; \
388  } else { \
389  total_sz += NAME.GetDataSize(); \
390  REPORT_ALLOCATION(NAME); \
391  }
392 
393 #define ALLOCATE_REQUIRED_DATA(NAME, num, channels) \
394  { \
395  success &= NAME.InitTexture(num, 1, channels); \
396  ASSERT_ALLOCATION(NAME); \
397  }
398 
399 #define ALLOCATE_OPTIONAL_DATA(NAME, num, channels, option) \
400  if (option) { \
401  option = NAME.InitTexture(num, 1, channels); \
402  CHECK_ALLOCATION(NAME); \
403  } else { \
404  NAME.InitTexture(0, 0, 0); \
405  }
406 
408  // given m camera, npoint, k measurements.. the number of float is
409  bool success = true;
410  size_t total_sz = 0;
411 
413  vector<int> qmap, qlist;
414  vector<float> qmapw, qlistw;
415  ProcessIndexCameraQ(qmap, qlist);
416 
418  ALLOCATE_REQUIRED_DATA(_cuBufferData, 256, 1); // 256
422 
431 
432  //
436  2);
438  2);
439 
440  if (__no_jacobian_store) {
445  } else {
448  __jc_store_transpose); // 16k
450  __jc_store_original); // 16k
451 
454  __jc_store_transpose); // k
456  } else {
458  }
459  }
460 
462  if (_camera_idx && _point_idx) {
466  vector<int> cpi(_num_camera + 1), cpidx(_num_imgpt);
467  vector<int> cpnum(_num_camera, 0);
468  cpi[0] = 0;
469  for (int i = 0; i < _num_imgpt; ++i) cpnum[_camera_idx[i]]++;
470  for (int i = 1; i <= _num_camera; ++i) cpi[i] = cpi[i - 1] + cpnum[i - 1];
471  vector<int> cptidx = cpi;
472  for (int i = 0; i < _num_imgpt; ++i) cpidx[cptidx[_camera_idx[i]]++] = i;
473  if (_num_imgpt_q > 0) ProcessWeightCameraQ(cpnum, qmap, qmapw, qlistw);
475 
479  : _imgpt_data);
485  vector<int> ridx(_num_imgpt);
486  for (int i = 0; i < _num_imgpt; ++i) ridx[cpidx[i]] = i;
488  }
489  if (_num_imgpt_q > 0) {
490  _cuCameraQMap.CopyFromHost(&qmap[0]);
491  _cuCameraQMapW.CopyFromHost(&qmapw[0]);
492  _cuCameraQList.CopyFromHost(&qlist[0]);
493  _cuCameraQListW.CopyFromHost(&qlistw[0]);
494  }
496 
500  vector<int> ppi(_num_point + 1);
501  for (int i = 0, last_point = -1; i < _num_imgpt; ++i) {
502  int pt = _point_idx[i];
503  while (last_point < pt) ppi[++last_point] = i;
504  }
505  ppi[_num_point] = _num_imgpt;
506 
508  vector<int> projection_map(_num_imgpt * 2);
509  for (int i = 0; i < _num_imgpt; ++i) {
510  int* imp = &projection_map[i * 2];
511  imp[0] = _camera_idx[i] * 2;
512  imp[1] = _point_idx[i];
513  }
515 
519  _cuProjectionMap.CopyFromHost(&projection_map[0]);
521  }
522 
523  __memory_usage = total_sz;
524  if (__verbose_level > 1)
525  std::cout << "Memory for Motion/Structure/Jacobian:\t"
526  << (total_sz / 1024 / 1024) << "MB\n";
527  return success;
528 }
529 
530 bool SparseBundleCU::ProcessIndexCameraQ(vector<int>& qmap,
531  vector<int>& qlist) {
532  // reset q-data
533  qmap.resize(0);
534  qlist.resize(0);
535  _num_imgpt_q = 0;
536 
537  // verify input
538  if (_camera_idx == NULL) return true;
539  if (_point_idx == NULL) return true;
540  if (_focal_mask == NULL) return true;
541  if (_num_camera == 0) return true;
542  if (_weight_q <= 0) return true;
543 
545 
546  int error = 0;
547  vector<int> temp(_num_camera * 2, -1);
548 
549  for (int i = 0; i < _num_camera; ++i) {
550  int iq = _focal_mask[i];
551  if (iq > i) {
552  error = 1;
553  break;
554  }
555  if (iq < 0) continue;
556  if (iq == i) continue;
557  int ip = temp[2 * iq];
558  // float ratio = _camera_data[i].f / _camera_data[iq].f;
559  // if(ratio < 0.01 || ratio > 100)
560  //{
561  // std::cout << "Warning: constaraints on largely different camreas\n";
562  // continue;
563  //}else
564  if (_focal_mask[iq] != iq) {
565  error = 1;
566  break;
567  } else if (ip == -1) {
568  temp[2 * iq] = i;
569  temp[2 * iq + 1] = i;
570  temp[2 * i] = iq;
571  temp[2 * i + 1] = iq;
572  } else {
573  // maintain double-linked list
574  temp[2 * i] = ip;
575  temp[2 * i + 1] = iq;
576  temp[2 * ip + 1] = i;
577  temp[2 * iq] = i;
578  }
579  }
580 
581  if (error) {
582  std::cout << "Error: incorrect constraints\n";
583  _focal_mask = NULL;
584  return false;
585  }
586 
587  qlist.resize(_num_camera * 2, -1);
588  for (int i = 0; i < _num_camera; ++i) {
589  int inext = temp[2 * i + 1];
590  if (inext == -1) continue;
591  qlist[2 * i] = _num_imgpt + _num_imgpt_q;
592  qlist[2 * inext + 1] = _num_imgpt + _num_imgpt_q;
593  qmap.push_back(i);
594  qmap.push_back(inext);
595  _num_imgpt_q++;
596  }
597  return true;
598 }
599 
600 void SparseBundleCU::ProcessWeightCameraQ(vector<int>& cpnum, vector<int>& qmap,
601  vector<float>& qmapw,
602  vector<float>& qlistw) {
603  // set average focal length and average radial distortion
604  vector<float> qpnum(_num_camera, 0), qcnum(_num_camera, 0);
605  vector<float> fs(_num_camera, 0), rs(_num_camera, 0);
606 
607  for (int i = 0; i < _num_camera; ++i) {
608  int qi = _focal_mask[i];
609  if (qi == -1) continue;
610  // float ratio = _camera_data[i].f / _camera_data[qi].f;
611  // if(ratio < 0.01 || ratio > 100) continue;
612  fs[qi] += _camera_data[i].f;
613  rs[qi] += _camera_data[i].radial;
614  qpnum[qi] += cpnum[i];
615  qcnum[qi] += 1.0f;
616  }
617 
618  // this seems not really matter..they will converge anyway
619  for (int i = 0; i < _num_camera; ++i) {
620  int qi = _focal_mask[i];
621  if (qi == -1) continue;
622  // float ratio = _camera_data[i].f / _camera_data[qi].f;
623  // if(ratio < 0.01 || ratio > 100) continue;
624  _camera_data[i].f = fs[qi] / qcnum[qi];
625  _camera_data[i].radial = rs[qi] / qcnum[qi];
626  }
627 
628  qmapw.resize(_num_imgpt_q * 2, 0);
629  qlistw.resize(_num_camera * 2, 0);
630  for (int i = 0; i < _num_imgpt_q; ++i) {
631  int cidx = qmap[i * 2], qi = _focal_mask[cidx];
632  float wi = sqrt(qpnum[qi] / qcnum[qi]) * _weight_q;
633  float wr = (__use_radial_distortion ? wi * _camera_data[qi].f : 0.0);
634  qmapw[i * 2] = wi;
635  qmapw[i * 2 + 1] = wr;
636  qlistw[cidx * 2] = wi;
637  qlistw[cidx * 2 + 1] = wr;
638  }
639 }
640 
672 }
673 
675  int incompatible_radial_distortion = 0;
676  if (__focal_normalize) {
677  if (__focal_scaling == 1.0f) {
678  //------------------------------------------------------------------
680  vector<float> focals(_num_camera);
681  for (int i = 0; i < _num_camera; ++i) focals[i] = _camera_data[i].f;
682  std::nth_element(focals.begin(), focals.begin() + _num_camera / 2,
683  focals.end());
684  float median_focal_length = focals[_num_camera / 2];
685  __focal_scaling = __data_normalize_median / median_focal_length;
686  float radial_factor = median_focal_length * median_focal_length * 4.0f;
687 
689  _imgpt_datax.resize(_num_imgpt * 2);
690  for (int i = 0; i < _num_imgpt * 2; ++i)
692  for (int i = 0; i < _num_camera; ++i) {
695  } else if (__reset_initial_distortion) {
696  _camera_data[i].radial = 0;
697  } else if (_camera_data[i].distortion_type != __use_radial_distortion) {
698  incompatible_radial_distortion++;
699  _camera_data[i].radial = 0;
700  } else if (__use_radial_distortion == -1) {
701  _camera_data[i].radial *= radial_factor;
702  }
703  }
704  if (__verbose_level > 2)
705  std::cout << "Focal length normalized by " << __focal_scaling << '\n';
707  }
708  } else {
710  for (int i = 0; i < _num_camera; ++i) {
712  _camera_data[i].radial = 0;
713  } else if (_camera_data[i].distortion_type != __use_radial_distortion) {
714  _camera_data[i].radial = 0;
715  incompatible_radial_distortion++;
716  }
717  }
719  }
720  _imgpt_datax.resize(0);
721  }
722 
723  if (incompatible_radial_distortion) {
724  std::cout << "WARNING: incompatible radial distortion input; reset to 0;\n";
725  }
726 }
727 
729  if (__depth_scaling == 1.0f) {
730  const float dist_bound = 1.0f;
731  vector<float> oz(_num_imgpt);
732  vector<float> cpdist1(_num_camera, dist_bound);
733  vector<float> cpdist2(_num_camera, -dist_bound);
734  vector<int> camnpj(_num_camera, 0), cambpj(_num_camera, 0);
735  int bad_point_count = 0;
736  for (int i = 0; i < _num_imgpt; ++i) {
737  int cmidx = _camera_idx[i];
738  CameraT* cam = _camera_data + cmidx;
739  float* rz = cam->m[2];
740  float* x = _point_data + 4 * _point_idx[i];
741  oz[i] = (rz[0] * x[0] + rz[1] * x[1] + rz[2] * x[2] + cam->t[2]);
742 
744  // points behind camera may causes big problem
745  float ozr = oz[i] / cam->t[2];
746  if (fabs(ozr) < __depth_check_epsilon) {
747  bad_point_count++;
748  float px = cam->f * (cam->m[0][0] * x[0] + cam->m[0][1] * x[1] +
749  cam->m[0][2] * x[2] + cam->t[0]);
750  float py = cam->f * (cam->m[1][0] * x[0] + cam->m[1][1] * x[1] +
751  cam->m[1][2] * x[2] + cam->t[1]);
752  float mx = _imgpt_data[i * 2], my = _imgpt_data[2 * i + 1];
753  bool checkx = fabs(mx) > fabs(my);
754  if ((checkx && px * oz[i] * mx < 0 && fabs(mx) > 64) ||
755  (!checkx && py * oz[i] * my < 0 && fabs(my) > 64)) {
756  if (__verbose_level > 3)
757  std::cout << "Warning: proj of #" << cmidx
758  << " on the wrong side, oz = " << oz[i] << " ("
759  << (px / oz[i]) << ',' << (py / oz[i]) << ") (" << mx
760  << ',' << my << ")\n";
762  if (oz[i] > 0)
763  cpdist2[cmidx] = 0;
764  else
765  cpdist1[cmidx] = 0;
766  }
767  if (oz[i] >= 0)
768  cpdist1[cmidx] = std::min(cpdist1[cmidx], oz[i]);
769  else
770  cpdist2[cmidx] = std::max(cpdist2[cmidx], oz[i]);
771  }
772  if (oz[i] < 0) {
774  cambpj[cmidx]++;
775  }
776  camnpj[cmidx]++;
777  }
778  if (bad_point_count > 0 && __depth_degeneracy_fix) {
780  std::cout << "Enable data normalization on degeneracy\n";
781  __focal_normalize = true;
782  __depth_normalize = true;
783  }
784  if (__depth_normalize) {
785  std::nth_element(oz.begin(), oz.begin() + _num_imgpt / 2, oz.end());
786  float oz_median = oz[_num_imgpt / 2];
787  float shift_min = std::min(oz_median * 0.001f, 1.0f);
788  float dist_threshold = shift_min * 0.1f;
789  __depth_scaling = (1.0f / oz_median) / __data_normalize_median;
790  if (__verbose_level > 2)
791  std::cout << "Depth normalized by " << __depth_scaling << " ("
792  << oz_median << ")\n";
793 
794  for (int i = 0; i < _num_camera; ++i) {
795  // move the camera a little bit?
796  if (!__depth_degeneracy_fix) {
797  } else if ((cpdist1[i] < dist_threshold ||
798  cpdist2[i] > -dist_threshold)) {
799  float shift = shift_min; //(cpdist1[i] <= -cpdist2[i] ? shift_min :
800  //-shift_min);
801  // if(cpdist1[i] < dist_bound && cpdist2[i] > - dist_bound) shift = -
802  // 0.5f * (cpdist1[i] + cpdist2[i]);
803  bool boths =
804  cpdist1[i] < dist_threshold && cpdist2[i] > -dist_threshold;
805  _camera_data[i].t[2] += shift;
806  if (__verbose_level > 3)
807  std::cout << "Adjust C" << std::setw(5) << i << " by "
808  << std::setw(12) << shift << " [B" << std::setw(2)
809  << cambpj[i] << "/" << std::setw(5) << camnpj[i] << "] ["
810  << (boths ? 'X' : ' ') << "][" << cpdist1[i] << ", "
811  << cpdist2[i] << "]\n";
813  }
814  _camera_data[i].t[0] *= __depth_scaling;
815  _camera_data[i].t[1] *= __depth_scaling;
816  _camera_data[i].t[2] *= __depth_scaling;
817  }
818  for (int i = 0; i < _num_point; ++i) {
820  _point_data[4 * i + 0] *= __depth_scaling;
821  _point_data[4 * i + 1] *= __depth_scaling;
822  _point_data[4 * i + 2] *= __depth_scaling;
823  }
824  }
825  if (__num_point_behind > 0)
826  std::cout << "WARNING: " << __num_point_behind
827  << " points are behind cameras.\n";
828  if (__num_camera_modified > 0)
829  std::cout << "WARNING: " << __num_camera_modified
830  << " camera moved to avoid degeneracy.\n";
831  }
832 }
833 
836  NormalizeDataD();
837  NormalizeDataF();
838 }
839 
841  if (__focal_normalize && __focal_scaling != 1.0f) {
842  float squared_focal_factor = (__focal_scaling * __focal_scaling);
843  for (int i = 0; i < _num_camera; ++i) {
845  if (__use_radial_distortion == -1)
846  _camera_data[i].radial *= squared_focal_factor;
848  }
849  _projection_sse /= squared_focal_factor;
850  __focal_scaling = 1.0f;
851  _imgpt_datax.resize(0);
852  } else if (__use_radial_distortion) {
853  for (int i = 0; i < _num_camera; ++i)
854  _camera_data[i].distortion_type = __use_radial_distortion;
855  }
856 
857  if (__depth_normalize && __depth_scaling != 1.0f) {
858  for (int i = 0; i < _num_camera; ++i) {
859  _camera_data[i].t[0] /= __depth_scaling;
860  _camera_data[i].t[1] /= __depth_scaling;
861  _camera_data[i].t[2] /= __depth_scaling;
862  }
863  for (int i = 0; i < _num_point; ++i) {
864  _point_data[4 * i + 0] /= __depth_scaling;
865  _point_data[4 * i + 1] /= __depth_scaling;
866  _point_data[4 * i + 2] /= __depth_scaling;
867  }
868  __depth_scaling = 1.0f;
869  }
870 }
871 
876 }
877 
879  CuTexImage& proj) {
884  if (_num_imgpt_q > 0)
886  return (float)ComputeVectorNorm(proj, _cuBufferData);
887 }
888 
890  CuTexImage& proj) {
895  if (_num_imgpt_q > 0)
897  return (float)ComputeVectorNorm(proj, _cuBufferData);
898 }
899 
901  double e1 = 0, e2 = 0;
902  for (int i = 0; i < _num_imgpt; ++i) {
903  float* c = (float*)(_camera_data + _camera_idx[i]);
904  float* p = _point_data + 4 * _point_idx[i];
905  const float* m = _imgpt_datax.size() > 0 ? (&_imgpt_datax[i * 2])
906  : (_imgpt_data + 2 * i);
907  float* r = c + 4;
908  float* t = c + 1;
909  float dx1, dy1;
911  float z = r[6] * p[0] + r[7] * p[1] + r[8] * p[2] + t[2];
912  float xx = (r[0] * p[0] + r[1] * p[1] + r[2] * p[2] + t[0]);
913  float yy = (r[3] * p[0] + r[4] * p[1] + r[5] * p[2] + t[1]);
914  float x = xx / z;
915  float y = yy / z;
916  if (__use_radial_distortion == -1) {
917  float rn = (m[0] * m[0] + m[1] * m[1]) * c[13] + 1.0f;
918  dx1 = c[0] * x - m[0] * rn;
919  dy1 = c[0] * y - m[1] * rn;
920  e1 += (dx1 * dx1 + dy1 * dy1);
921  e2 += (dx1 * dx1 + dy1 * dy1) / (rn * rn);
922  } else if (__use_radial_distortion) {
923  float rn = (x * x + y * y) * c[13] + 1.0f;
924  dx1 = c[0] * x * rn - m[0];
925  dy1 = c[0] * y * rn - m[1];
926  e1 += (dx1 * dx1 + dy1 * dy1) / (rn * rn);
927  e2 += (dx1 * dx1 + dy1 * dy1);
928  } else {
929  dx1 = c[0] * x - m[0];
930  dy1 = c[0] * y - m[1];
931  e1 += (dx1 * dx1 + dy1 * dy1);
932  e2 += (dx1 * dx1 + dy1 * dy1);
933  }
934  if (!isfinite(dx1) || !isfinite(dy1))
935  std::cout << "x = " << xx << " y = " << yy << " z = " << z << '\n'
936  << "t0 = " << t[0] << " t1 = " << t[1] << " t2 = " << t[2]
937  << '\n' << "p0 = " << p[0] << " p1 = " << p[1]
938  << " p2 = " << p[2] << '\n';
939  }
942  std::cout << "DEBUG: mean squared error = " << e1
943  << " in undistorted domain;\n";
944  std::cout << "DEBUG: mean squared error = " << e2
945  << " in distorted domain.\n";
946 }
947 
949  bool success = true;
950  size_t total_sz = 0;
951  int plen = GetParameterLength(); // q = 8m + 4n
952 
960 
962  unsigned int cblock_len = (__use_radial_distortion ? 64 : 56);
964  1); // 64m + 12n
965  if (__accurate_gain_ratio) {
967  } else {
969  }
971 
973  __memory_usage += total_sz;
974  if (__verbose_level > 1)
975  std::cout << "Memory for Conjugate Gradient Solver:\t"
976  << (total_sz / 1024 / 1024) << "MB\n";
977  return success;
978 }
979 
981  if (!_cuVectorSJ.IsValid()) return;
982 
985  CuTexImage null;
986  null.SwapData(_cuVectorSJ);
988  null.SwapData(_cuVectorSJ);
991  } else {
992  CuTexImage null;
993  null.SwapData(_cuVectorSJ);
995  ComputeBlockPC(0, true);
996  null.SwapData(_cuVectorSJ);
999  }
1000 }
1001 
1003  if (__no_jacobian_store) return;
1006  return;
1008 
1014  if (shuffle && __jc_store_transpose && _cuJacobianCameraT.IsValid())
1017  } else {
1022  }
1024 }
1025 
1028  if (mode == 0) mode = __bundle_current_mode;
1034  __use_radial_distortion, mode);
1035 
1037  if (!_cuVectorSJ.IsValid()) {
1038  } else if (mode == 2) {
1039  if (!_cuJacobianPoint.IsValid())
1040  ComputeVXY(JtE, _cuVectorSJ, JtE, _num_point * 4, _num_camera * 8);
1041  } else if (mode == 1)
1042  ComputeVXY(JtE, _cuVectorSJ, JtE, _num_camera * 8);
1043  else
1044  ComputeVXY(JtE, _cuVectorSJ, JtE,
1045  _cuJacobianPoint.IsValid() ? _num_camera * 8 : 0);
1046 
1047  } else if (__jc_store_transpose) {
1050  _cuPointMeasurementMap, JtE, true, mode);
1051  } else {
1054  _cuPointMeasurementMap, JtE, false, mode);
1055  }
1056 
1057  if (mode != 2 && _num_imgpt_q > 0)
1059  JtE);
1060 }
1061 
1062 void SparseBundleCU::SaveBundleRecord(int iter, float res, float damping,
1063  float& g_norm, float& g_inf) {
1064  // do not really compute if parameter not specified...
1065  // for large dataset, it never converges..
1066  g_inf =
1068  g_norm = __save_gradient_norm
1070  : g_inf;
1071  ConfigBA::SaveBundleRecord(iter, res, damping, g_norm, g_inf);
1072 }
1073 
1076  if (__no_jacobian_store || (__multiply_jx_usenoj && mode != 2) ||
1078  if (_cuVectorSJ.IsValid()) {
1079  if (mode == 0)
1081  else if (mode == 1)
1083  else if (mode == 2)
1085  _num_camera * 8);
1089  } else {
1092  __use_radial_distortion, mode);
1093  }
1094  } else {
1096  _cuJacobianPoint, _cuProjectionMap, JX, mode);
1097  }
1098 
1099  if (_num_imgpt_q > 0 && mode != 2) {
1101  _num_imgpt);
1102  }
1103 }
1104 
1105 void SparseBundleCU::ComputeBlockPC(float lambda, bool dampd) {
1107 
1108  bool use_diagonal_q = _cuCameraQListW.IsValid() && __bundle_current_mode != 2;
1109  if (use_diagonal_q)
1111 
1114  lambda, dampd, _cuCameraData, _cuPointData, _cuMeasurements,
1118  use_diagonal_q, __bundle_current_mode);
1119  } else if (__jc_store_transpose) {
1124  use_diagonal_q, __bundle_current_mode);
1125  } else {
1126  ComputeDiagonalBlock(lambda, dampd, _cuJacobianCamera,
1130  false, use_diagonal_q, __bundle_current_mode);
1131  }
1132 }
1133 
1137  __use_radial_distortion, mode);
1138 }
1139 
1142  if (__no_jacobian_store) return;
1143  if (!__jc_store_transpose && !__jc_store_original) return;
1144 
1146  bool use_diagonal_q = _cuCameraQListW.IsValid();
1147  if (use_diagonal_q) {
1148  CuTexImage null;
1149  ComputeDiagonalQ(_cuCameraQListW, null, JJ);
1150  }
1151  if (__jc_store_transpose) {
1154  _cuCameraMeasurementList, JJ, JJI, true,
1155  __use_radial_distortion, use_diagonal_q);
1156  } else {
1159  _cuCameraMeasurementList, JJ, JJI, false,
1160  __use_radial_distortion, use_diagonal_q);
1161  }
1162 }
1163 
1165  //----------------------------------------------------------
1166  //(Jt * J + lambda * diag(Jt * J)) X = Jt * e
1167  //-------------------------------------------------------------
1169  __recent_cg_status = ' ';
1170 
1171  // diagonal for jacobian preconditioning...
1172  int plen = GetParameterLength();
1173  CuTexImage null;
1174  CuTexImage& VectorDP =
1175  __lm_use_diagonal_damp ? _cuVectorJJ : null; // diagonal
1177 
1179  // B = [BC 0 ; 0 BP]
1180  // m = [mc 0; 0 mp];
1181  // A x= BC * x - JcT * Jp * mp * JpT * Jc * x
1182  // = JcT * Jc x + lambda * D * x + ........
1184 
1185  CuTexImage r;
1187  CuTexImage p;
1189  CuTexImage z;
1191  CuTexImage x;
1193  CuTexImage d;
1194  d.SetTexture(VectorDP.data(), 8 * _num_camera);
1195 
1196  CuTexImage& u = _cuVectorRK;
1197  CuTexImage& v = _cuVectorPK;
1198  CuTexImage up;
1199  up.SetTexture(u.data() + 8 * _num_camera, 4 * _num_point);
1200  CuTexImage vp;
1201  vp.SetTexture(v.data() + 8 * _num_camera, 4 * _num_point);
1202  CuTexImage uc;
1203  uc.SetTexture(z.data(), 8 * _num_camera);
1204 
1205  CuTexImage& e = _cuVectorJX;
1206  CuTexImage& e2 = _cuImageProj;
1207 
1208  ApplyBlockPC(_cuVectorJtE, u, 2);
1209  ComputeJX(u, e, 2);
1210  ComputeJtE(e, uc, 1);
1211  ComputeSAXPY(-1.0f, uc, _cuVectorJtE, r); // r
1212  ApplyBlockPC(r, p, 1); // z = p = M r
1213 
1214  float_t rtz0 = (float_t)ComputeVectorDot(r, p, _cuBufferData); // r(0)' *
1215  // z(0)
1216  ComputeJX(p, e, 1); // Jc * x
1217  ComputeJtE(e, u, 2); // JpT * jc * x
1218  ApplyBlockPC(u, v, 2);
1219  float_t qtq0 = (float_t)ComputeVectorNorm(e, _cuBufferData); // q(0)' * q(0)
1220  float_t pdp0 =
1221  (float_t)ComputeVectorNormW(p, d, _cuBufferData); // p(0)' * DDD * p(0)
1223  float_t alpha0 = rtz0 / (qtq0 + lambda * pdp0 - uv0);
1224 
1226  std::cout << " --0,\t alpha = " << alpha0
1227  << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n";
1228  if (!isfinite(alpha0)) {
1229  return 0;
1230  }
1231  if (alpha0 == 0) {
1232  __recent_cg_status = 'I';
1233  return 1;
1234  }
1235 
1237  ComputeSAX((float)alpha0, p, x); // x(k+1) = x(k) + a(k) * p(k)
1238  ComputeJX(v, e2, 2); // //Jp * mp * JpT * JcT * p
1239  ComputeSAXPY(-1.0f, e2, e, e);
1240  ComputeJtE(e, uc, 1); // JcT * ....
1241  ComputeSXYPZ(lambda, d, p, uc, uc);
1242  ComputeSAXPY((float)-alpha0, uc, r, r); // r(k + 1) = r(k) - a(k) * A * pk
1243 
1245  float_t rtzk = rtz0, rtz_min = rtz0, betak;
1246  int iteration = 1;
1248 
1249  while (true) {
1250  ApplyBlockPC(r, z, 1);
1251 
1253  float_t rtzp = rtzk;
1254  rtzk = (float_t)ComputeVectorDot(
1255  r, z, _cuBufferData); //[r(k + 1) = M^(-1) * z(k + 1)] * z(k+1)
1256  float_t rtz_ratio = sqrt(fabs(rtzk / rtz0));
1257 
1258  if (rtz_ratio < __cg_norm_threshold) {
1259  if (__recent_cg_status == ' ')
1261  ? '0' + iteration
1262  : 'N';
1263  if (iteration >= __cg_min_iteration) break;
1264  }
1266  betak = rtzk / rtzp; // beta
1267  rtz_min = std::min(rtz_min, rtzk);
1268 
1269  ComputeSAXPY((float)betak, p, z, p); // p(k) = z(k) + b(k) * p(k - 1)
1270  ComputeJX(p, e, 1); // Jc * p
1271  ComputeJtE(e, u, 2); // JpT * jc * p
1272  ApplyBlockPC(u, v, 2);
1274 
1275  float_t qtqk = (float_t)ComputeVectorNorm(e, _cuBufferData); // q(k)' q(k)
1276  float_t pdpk =
1277  (float_t)ComputeVectorNormW(p, d, _cuBufferData); // p(k)' * DDD * p(k)
1279  float_t alphak = rtzk / (qtqk + lambda * pdpk - uvk);
1280 
1283  std::cout << " --" << iteration << ",\t alpha= " << alphak
1284  << ", rtzk/rtz0 = " << rtz_ratio
1285  << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n";
1286 
1288  if (!isfinite(alphak) || rtz_ratio > __cg_norm_guard) {
1289  __recent_cg_status = 'X';
1290  break;
1291  } // something doesn't converge..
1292 
1294  ComputeSAXPY((float)alphak, p, x, x); // x(k+1) = x(k) + a(k) * p(k)
1295 
1297  ++iteration;
1299  if (iteration >= std::min(__cg_max_iteration, plen)) break;
1300 
1301  ComputeJX(v, e2, 2); // //Jp * mp * JpT * JcT * p
1302  ComputeSAXPY(-1.0f, e2, e, e);
1303  ComputeJtE(e, uc, 1); // JcT * ....
1304  ComputeSXYPZ(lambda, d, p, uc, uc);
1305  ComputeSAXPY((float)-alphak, uc, r, r); // r(k + 1) = r(k) - a(k) * A * pk
1306  }
1307 
1308  // if(__recent_cg_status == 'X') return iteration;
1309 
1310  ComputeJX(x, e, 1);
1311  ComputeJtE(e, u, 2);
1312  CuTexImage jte_p;
1313  jte_p.SetTexture(_cuVectorJtE.data() + 8 * _num_camera, _num_point * 4);
1314  ComputeSAXPY(-1.0f, up, jte_p, vp);
1315  ApplyBlockPC(v, _cuVectorXK, 2);
1316  return iteration;
1317 }
1319  //----------------------------------------------------------
1320  //(Jt * J + lambda * diag(Jt * J)) X = Jt * e
1321  //-------------------------------------------------------------
1323  __recent_cg_status = ' ';
1324 
1325  // diagonal for jacobian preconditioning...
1326  int plen = GetParameterLength();
1327  CuTexImage null;
1328  CuTexImage& VectorDP =
1329  __lm_use_diagonal_damp ? _cuVectorJJ : null; // diagonal
1330  CuTexImage& VectorQK = _cuVectorZK; // temporary
1332 
1335  _cuVectorPK); // z(0) = p(0) = M * r(0)//r(0) = Jt * e
1336  ComputeJX(_cuVectorPK, _cuVectorJX); // q(0) = J * p(0)
1337 
1340  _cuBufferData); // r(0)' * z(0)
1341  float_t qtq0 =
1344  _cuVectorPK, VectorDP, _cuBufferData); // p(0)' * DDD * p(0)
1345  float_t alpha0 = rtz0 / (qtq0 + lambda * ptdp0);
1346 
1348  std::cout << " --0,\t alpha = " << alpha0
1349  << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n";
1350  if (!isfinite(alpha0)) {
1351  return 0;
1352  }
1353  if (alpha0 == 0) {
1354  __recent_cg_status = 'I';
1355  return 1;
1356  }
1357 
1359  ComputeSAX((float)alpha0, _cuVectorPK,
1360  _cuVectorXK); // x(k+1) = x(k) + a(k) * p(k)
1361  ComputeJtE(_cuVectorJX, VectorQK); // Jt * (J * p0)
1362 
1363  ComputeSXYPZ(lambda, VectorDP, _cuVectorPK, VectorQK,
1364  VectorQK); // Jt * J * p0 + lambda * DDD * p0
1365  ComputeSAXPY(
1366  (float)-alpha0, VectorQK, _cuVectorJtE,
1367  _cuVectorRK); // r(k+1) = r(k) - a(k) * (Jt * q(k) + DDD * p(k)) ;
1368 
1369  float_t rtzk = rtz0, rtz_min = rtz0, betak;
1370  int iteration = 1;
1372 
1373  while (true) {
1375 
1377  float_t rtzp = rtzk;
1378  rtzk = (float_t)ComputeVectorDot(
1380  _cuBufferData); //[r(k + 1) = M^(-1) * z(k + 1)] * z(k+1)
1381  float_t rtz_ratio = sqrt(fabs(rtzk / rtz0));
1382  if (rtz_ratio < __cg_norm_threshold) {
1383  if (__recent_cg_status == ' ')
1385  ? '0' + iteration
1386  : 'N';
1387  if (iteration >= __cg_min_iteration) break;
1388  }
1389 
1391  betak = rtzk / rtzp; // beta
1392  rtz_min = std::min(rtz_min, rtzk);
1393 
1394  ComputeSAXPY((float)betak, _cuVectorPK, _cuVectorZK,
1395  _cuVectorPK); // p(k) = z(k) + b(k) * p(k - 1)
1396  ComputeJX(_cuVectorPK, _cuVectorJX); // q(k) = J * p(k)
1398 
1399  float_t qtqk =
1402  _cuVectorPK, VectorDP, _cuBufferData); // p(k)' * DDD * p(k)
1403  float_t alphak = rtzk / (qtqk + lambda * ptdpk);
1404 
1407  std::cout << " --" << iteration << ",\t alpha= " << alphak
1408  << ", rtzk/rtz0 = " << rtz_ratio
1409  << ", t = " << BundleTimerGetNow(TIMER_CG_ITERATION) << "\n";
1410 
1412  if (!isfinite(alphak) || rtz_ratio > __cg_norm_guard) {
1413  __recent_cg_status = 'X';
1414  break;
1415  } // something doesn't converge..
1416 
1418  ComputeSAXPY((float)alphak, _cuVectorPK, _cuVectorXK,
1419  _cuVectorXK); // x(k+1) = x(k) + a(k) * p(k)
1420 
1422  ++iteration;
1424  if (iteration >= std::min(__cg_max_iteration, plen)) break;
1425 
1426  // if(iteration == 2 && rtz_ratio < __cg_norm_threshold)
1427  if (__cg_recalculate_freq > 0 && iteration % __cg_recalculate_freq == 0) {
1430  ComputeJtE(_cuVectorJX, VectorQK);
1431  ComputeSXYPZ(lambda, VectorDP, _cuVectorXK, VectorQK, VectorQK);
1432  ComputeSAXPY(-1.0f, VectorQK, _cuVectorJtE, _cuVectorRK);
1433  } else {
1434  ComputeJtE(_cuVectorJX, VectorQK);
1435  ComputeSXYPZ(lambda, VectorDP, _cuVectorPK, VectorQK, VectorQK); //
1436  ComputeSAXPY(
1437  (float)-alphak, VectorQK, _cuVectorRK,
1438  _cuVectorRK); // r(k+1) = r(k) - a(k) * (Jt * q(k) + DDD * p(k)) ;
1439  }
1440  }
1441  return iteration;
1442 }
1443 
1448  return 1;
1452  return 1;
1453  } else {
1456  : SolveNormalEquationPCGB(lambda);
1457  }
1458 }
1459 
1464  if (reduced)
1466  else
1472 }
1473 
1475  CuTexImage& cuImageTempProj) {
1483  return EvaluateProjection(_cuCameraDataEX, _cuPointData, cuImageTempProj);
1490  return EvaluateProjection(_cuCameraData, _cuPointDataEX, cuImageTempProj);
1491  } else {
1496  return EvaluateProjection(_cuCameraDataEX, _cuPointDataEX, cuImageTempProj);
1497  }
1498 }
1499 
1500 float SparseBundleCU::SaveUpdatedSystem(float residual_reduction,
1501  float dx_sqnorm, float damping) {
1502  float expected_reduction;
1504  CuTexImage xk;
1506  CuTexImage jte;
1508  float dxtg = (float)ComputeVectorDot(xk, jte, _cuBufferData);
1509  if (__lm_use_diagonal_damp) {
1510  CuTexImage jj;
1512  float dq = (float)ComputeVectorNormW(xk, jj, _cuBufferData);
1513  expected_reduction = damping * dq + dxtg;
1514  } else {
1515  expected_reduction = damping * dx_sqnorm + dxtg;
1516  }
1519  CuTexImage xk;
1521  CuTexImage jte;
1523  float dxtg = (float)ComputeVectorDot(xk, jte, _cuBufferData);
1524  if (__lm_use_diagonal_damp) {
1525  CuTexImage jj;
1527  float dq = (float)ComputeVectorNormW(xk, jj, _cuBufferData);
1528  expected_reduction = damping * dq + dxtg;
1529  } else {
1530  expected_reduction = damping * dx_sqnorm + dxtg;
1531  }
1533  } else {
1534  float dxtg =
1536 
1537  if (__accurate_gain_ratio) {
1539  float njx = (float)ComputeVectorNorm(_cuVectorJX, _cuBufferData);
1540  expected_reduction = 2.0f * dxtg - njx;
1541  // could the expected reduction be negative??? not sure
1542  if (expected_reduction <= 0)
1543  expected_reduction = 0.001f * residual_reduction;
1544  } else if (__lm_use_diagonal_damp) {
1545  float dq =
1547  expected_reduction = damping * dq + dxtg;
1548  } else {
1549  expected_reduction = damping * dx_sqnorm + dxtg;
1550  }
1551 
1555 
1556  //_cuCameraData.CopyToHost(_camera_data);
1557  //_cuPointData.CopyToHost(_point_data);
1558  // DebugProjections();
1559  }
1561  return float(residual_reduction / expected_reduction);
1562 }
1563 
1568  }
1569 }
1570 
1573  CuTexImage temp;
1574  temp.SetTexture(_cuVectorXK.data(), 8 * _num_camera);
1575  return ComputeVectorNorm(temp, _cuBufferData);
1576 
1578  CuTexImage temp;
1579  temp.SetTexture(_cuVectorXK.data() + 8 * _num_camera, 4 * _num_point);
1580  return ComputeVectorNorm(temp, _cuBufferData);
1581  } else {
1583  }
1584 }
1585 
1589 
1591  float mse_convert_ratio =
1593  float error_display_ratio = __verbose_sse ? _num_imgpt : 1.0f;
1594  const int edwidth = __verbose_sse ? 12 : 8;
1595  _projection_sse =
1597  __initial_mse = __final_mse = _projection_sse * mse_convert_ratio;
1598 
1599  // compute jacobian diagonals for normalization
1601 
1602  // evalaute jacobian
1606  if (__verbose_level)
1607  std::cout << "Initial " << (__verbose_sse ? "sumed" : "mean")
1608  << " squared error = " << __initial_mse * error_display_ratio
1609  << "\n----------------------------------------------\n";
1610 
1612  CuTexImage& cuImageTempProj = _cuVectorJX;
1613  // CuTexImage& cuVectorTempJX = _cuVectorJX;
1615 
1617  float damping_adjust = 2.0f, damping = __lm_initial_damp, g_norm, g_inf;
1618  SaveBundleRecord(0, _projection_sse * mse_convert_ratio, damping, g_norm,
1619  g_inf);
1620 
1622  std::cout << std::left;
1623  for (int i = 0; i < __lm_max_iteration && !__abort_flag;
1624  __current_iteration = (++i)) {
1626  int num_cg_iteration = SolveNormalEquation(damping);
1627 
1628  // there must be NaN somewhere
1629  if (num_cg_iteration == 0) {
1630  if (__verbose_level)
1631  std::cout << "#" << std::setw(3) << i << " quit on numeric errors\n";
1632  __pba_return_code = 'E';
1633  break;
1634  }
1635 
1636  // there must be infinity somewhere
1637  if (__recent_cg_status == 'I') {
1638  std::cout << "#" << std::setw(3) << i << " 0 I e=" << std::setw(edwidth)
1639  << "------- "
1640  << " u=" << std::setprecision(3) << std::setw(9) << damping
1641  << '\n' << std::setprecision(6);
1643  damping = damping * damping_adjust;
1644  damping_adjust = 2.0f * damping_adjust;
1645  --i;
1646  continue;
1647  }
1648 
1651 
1653  float dx_sqnorm = EvaluateDeltaNorm(), dx_norm = sqrt(dx_sqnorm);
1654 
1655  // In this library, we check absolute difference instead of realtive
1656  // difference
1657  if (dx_norm <= __lm_delta_threshold) {
1658  // damping factor must be way too big...or it converges
1659  if (__verbose_level > 1)
1660  std::cout << "#" << std::setw(3) << i << " " << std::setw(3)
1661  << num_cg_iteration << char(__recent_cg_status)
1662  << " quit on too small change (" << dx_norm << " < "
1663  << __lm_delta_threshold << ")\n";
1664  __pba_return_code = 'S';
1665  break;
1666  }
1668  // update structure and motion, check reprojection error
1669  float new_residual = UpdateCameraPoint(cuVectorDX, cuImageTempProj);
1670  float average_residual = new_residual * mse_convert_ratio;
1671  float residual_reduction = _projection_sse - new_residual;
1672 
1673  // do we find a better solution?
1674  if (isfinite(new_residual) && residual_reduction > 0) {
1676  float relative_reduction = 1.0f - (new_residual / _projection_sse);
1677 
1679  __num_lm_success++; // increase counter
1680  _projection_sse = new_residual; // save the new residual
1681  _cuImageProj.SwapData(cuImageTempProj); // save the new projection
1682 
1684  float gain_ratio =
1685  SaveUpdatedSystem(residual_reduction, dx_sqnorm, damping);
1686 
1688  SaveBundleRecord(i + 1, _projection_sse * mse_convert_ratio, damping,
1689  g_norm, g_inf);
1690 
1692  if (__verbose_level > 1)
1693  std::cout << "#" << std::setw(3) << i << " " << std::setw(3)
1694  << num_cg_iteration << char(__recent_cg_status)
1695  << " e=" << std::setw(edwidth)
1696  << average_residual * error_display_ratio
1697  << " u=" << std::setprecision(3) << std::setw(9) << damping
1698  << " r=" << std::setw(6)
1699  << floor(gain_ratio * 1000.f) * 0.001f
1700  << " g=" << std::setw(g_norm > 0 ? 9 : 1) << g_norm << " "
1701  << std::setw(9) << relative_reduction << ' ' << std::setw(9)
1702  << dx_norm << " t=" << int(BundleTimerGetNow()) << "\n"
1703  << std::setprecision(6);
1704 
1706  if (!IsTimeBudgetAvailable()) {
1707  if (__verbose_level > 1)
1708  std::cout << "#" << std::setw(3) << i << " used up time budget.\n";
1709  __pba_return_code = 'T';
1710  break;
1711  } else if (__lm_check_gradient && g_inf < __lm_gradient_threshold) {
1712  if (__verbose_level > 1)
1713  std::cout << "#" << std::setw(3) << i
1714  << " converged with small gradient\n";
1715  __pba_return_code = 'G';
1716  break;
1717  } else if (average_residual * error_display_ratio <= __lm_mse_threshold) {
1718  if (__verbose_level > 1)
1719  std::cout << "#" << std::setw(3) << i << " satisfies MSE threshold\n";
1720  __pba_return_code = 'M';
1721  break;
1722  } else {
1724  float temp = gain_ratio * 2.0f - 1.0f;
1725  float adaptive_adjust = 1.0f - temp * temp * temp; // powf(, 3.0f); //
1726  float auto_adjust = std::max(1.0f / 3.0f, adaptive_adjust);
1727 
1729  damping = damping * auto_adjust;
1730  damping_adjust = 2.0f;
1731  if (damping < __lm_minimum_damp)
1732  damping = __lm_minimum_damp;
1733  else if (__lm_damping_auto_switch == 0 && damping > __lm_maximum_damp &&
1735  damping = __lm_maximum_damp;
1736 
1739  }
1740  } else {
1741  if (__verbose_level > 1)
1742  std::cout << "#" << std::setw(3) << i << " " << std::setw(3)
1743  << num_cg_iteration << char(__recent_cg_status)
1744  << " e=" << std::setw(edwidth) << std::left
1745  << average_residual * error_display_ratio
1746  << " u=" << std::setprecision(3) << std::setw(9) << damping
1747  << " r=----- " << (__lm_check_gradient || __save_gradient_norm
1748  ? " g=---------"
1749  : " g=0")
1750  << " --------- " << std::setw(9) << dx_norm
1751  << " t=" << int(BundleTimerGetNow()) << "\n"
1752  << std::setprecision(6);
1753 
1755  damping > __lm_damping_auto_switch) {
1756  __lm_use_diagonal_damp = false;
1757  damping = __lm_damping_auto_switch;
1758  damping_adjust = 2.0f;
1759  if (__verbose_level > 1)
1760  std::cout << "NOTE: switch to damping with an identity matix\n";
1761  } else {
1763  damping = damping * damping_adjust;
1764  damping_adjust = 2.0f * damping_adjust;
1765  }
1766  }
1767 
1768  if (__verbose_level == 1) std::cout << '.';
1769  }
1770 
1771  __final_mse = float(_projection_sse * mse_convert_ratio);
1772  __final_mse_x =
1775  mse_convert_ratio
1776  : __final_mse;
1777 }
1778 
1779 #define PROFILE_(A, B) \
1780  BundleTimerStart(TIMER_PROFILE_STEP); \
1781  for (int i = 0; i < repeat; ++i) { \
1782  B; \
1783  FinishWorkCUDA(); \
1784  } \
1785  BundleTimerSwitch(TIMER_PROFILE_STEP); \
1786  std::cout << std::setw(24) << A << ": " \
1787  << (BundleTimerGet(TIMER_PROFILE_STEP) / repeat) << "\n";
1788 
1789 #define PROFILE(A, B) PROFILE_(#A, A B)
1790 #define PROXILE(A, B) PROFILE_(A, B)
1791 
1793  const int repeat = __profile_pba;
1794  std::cout << "---------------------------------\n"
1795  "| Run profiling steps ("
1796  << repeat << ") |\n"
1797  "---------------------------------\n"
1798  << std::left;
1799  ;
1800 
1802  PROXILE("Upload Measurements",
1804  _imgpt_datax.size() > 0 ? &_imgpt_datax[0] : _imgpt_data));
1805  PROXILE("Upload Point Data", _cuPointData.CopyToHost(_point_data));
1806  std::cout << "---------------------------------\n";
1807 
1814  FinishWorkCUDA();
1815 
1816  do {
1819  break;
1820  __lm_initial_damp *= 2.0f;
1821  } while (__lm_initial_damp < 1024.0f);
1822  std::cout << "damping set to " << __lm_initial_damp << " for profiling\n"
1823  << "---------------------------------\n";
1824 
1825  {
1826  int repeat = 10, cgmin = __cg_min_iteration, cgmax = __cg_max_iteration;
1828  __num_cg_iteration = 0;
1830  if (__num_cg_iteration != 100)
1831  std::cout << __num_cg_iteration << " cg iterations in all\n";
1832 
1834  __num_cg_iteration = 0;
1836  if (__num_cg_iteration != 100)
1837  std::cout << __num_cg_iteration << " cg iterations in all\n";
1838  std::cout << "---------------------------------\n";
1840  __num_cg_iteration = 0;
1841  PROXILE("Single iteration LMX", RunTestIterationLM(true));
1842  if (__num_cg_iteration != 100)
1843  std::cout << __num_cg_iteration << " cg iterations in all\n";
1845  __num_cg_iteration = 0;
1846  PROXILE("Single iteration LMB", RunTestIterationLM(false));
1847  if (__num_cg_iteration != 100)
1848  std::cout << __num_cg_iteration << " cg iterations in all\n";
1849  std::cout << "---------------------------------\n";
1850  __cg_max_iteration = cgmax;
1851  __cg_min_iteration = cgmin;
1852  }
1861  std::cout << "---------------------------------\n";
1864  std::cout << "---------------------------------\n";
1865 
1866  __multiply_jx_usenoj = false;
1871  if (!__no_jacobian_store) {
1872  if (__jc_store_original) {
1874  PROFILE(EvaluateJacobians, (false));
1875 
1876  if (__jc_store_transpose) {
1877  PROFILE(
1882  PROFILE(ComputeBlockPC, (0.001f, true));
1883 
1884  std::cout << "---------------------------------\n"
1885  "| Not storing original JC | \n"
1886  "---------------------------------\n";
1887  __jc_store_original = false;
1889  __jc_store_original = true;
1890  }
1892 
1893  std::cout << "---------------------------------\n"
1894  "| Not storing transpose JC | \n"
1895  "---------------------------------\n";
1896  __jc_store_transpose = false;
1899  PROFILE(ComputeBlockPC, (0.001f, true));
1900 
1902 
1903  } else if (__jc_store_transpose) {
1906  PROFILE(ComputeBlockPC, (0.001f, true));
1907  std::cout << "---------------------------------\n"
1908  "| Not storing original JC | \n"
1909  "---------------------------------\n";
1911  }
1912  }
1913 
1914  if (!__no_jacobian_store) {
1915  std::cout << "---------------------------------\n"
1916  "| Not storing Camera Jacobians | \n"
1917  "---------------------------------\n";
1918  __jc_store_transpose = false;
1919  __jc_store_original = false;
1924  PROFILE(ComputeBlockPC, (0.001f, true));
1925  }
1926 
1928 
1929  std::cout << "---------------------------------\n"
1930  "| Not storing any jacobians |\n"
1931  "---------------------------------\n";
1932  __no_jacobian_store = true;
1936  PROFILE(ComputeBlockPC, (0.001f, true));
1937 
1938  std::cout << "---------------------------------\n";
1939 }
1940 
1945  // DEBUG_FUNCN(_cuVectorXK, SolveNormalEquationPCGB, (0.001f), 100);
1948 }
1949 
1951  ofstream out1("../../matlab/cg_j.txt");
1952  ofstream out2("../../matlab/cg_b.txt");
1953  ofstream out3("../../matlab/cg_x.txt");
1954 
1955  out1 << std::setprecision(20);
1956  out2 << std::setprecision(20);
1957  out3 << std::setprecision(20);
1958 
1959  int plen = GetParameterLength();
1960  vector<float> jc(16 * _num_imgpt);
1961  vector<float> jp(8 * _num_imgpt);
1962  vector<float> ee(2 * _num_imgpt);
1963  vector<float> dx(plen);
1964 
1965  _cuJacobianCamera.CopyToHost(&jc[0]);
1966  _cuJacobianPoint.CopyToHost(&jp[0]);
1967  _cuImageProj.CopyToHost(&ee[0]);
1968  _cuVectorXK.CopyToHost(&dx[0]);
1969 
1970  for (int i = 0; i < _num_imgpt; ++i) {
1971  out2 << ee[i * 2] << ' ' << ee[i * 2 + 1] << ' ';
1972  int cidx = _camera_idx[i], pidx = _point_idx[i];
1973  float *cp = &jc[i * 16], *pp = &jp[i * 8];
1974  int cmin = cidx * 8, pmin = 8 * _num_camera + pidx * 4;
1975  for (int j = 0; j < 8; ++j)
1976  out1 << (i * 2 + 1) << ' ' << (cmin + j + 1) << ' ' << cp[j] << '\n';
1977  for (int j = 0; j < 8; ++j)
1978  out1 << (i * 2 + 2) << ' ' << (cmin + j + 1) << ' ' << cp[j + 8] << '\n';
1979  for (int j = 0; j < 4; ++j)
1980  out1 << (i * 2 + 1) << ' ' << (pmin + j + 1) << ' ' << pp[j] << '\n';
1981  for (int j = 0; j < 4; ++j)
1982  out1 << (i * 2 + 2) << ' ' << (pmin + j + 1) << ' ' << pp[j + 4] << '\n';
1983  }
1984 
1985  for (size_t i = 0; i < dx.size(); ++i) out3 << dx[i] << ' ';
1986 
1987  std::cout << "lambda = " << std::setprecision(20) << lambda << '\n';
1988 }
1989 
1990 } // namespace pba
void * X
Definition: SmallVector.cpp:45
#define PROXILE(A, B)
#define ALLOCATE_REQUIRED_DATA(NAME, num, channels)
#define DEBUG_FUNCN(v, func, input, N)
#define PROFILE(A, B)
#define ALLOCATE_OPTIONAL_DATA(NAME, num, channels, option)
#define NULL
bool __verbose_sse
Definition: ConfigBA.h:131
int __selected_device
Definition: ConfigBA.h:152
bool __fixed_intrinsics
Definition: ConfigBA.h:113
int __pba_return_code
Definition: ConfigBA.h:172
float __depth_scaling
Definition: ConfigBA.h:162
float __final_mse_x
Definition: ConfigBA.h:160
bool __jacobian_normalize
Definition: ConfigBA.h:139
float __focal_scaling
Definition: ConfigBA.h:161
bool __verbose_cg_iteration
Definition: ConfigBA.h:124
bool __jc_store_transpose
Definition: ConfigBA.h:134
void PrintBundleStatistics()
Definition: ConfigBA.cpp:243
float __cg_norm_guard
Definition: ConfigBA.h:103
int __current_device
Definition: ConfigBA.h:163
int __num_camera_modified
Definition: ConfigBA.h:170
void BundleTimerSwap(int timer1, int timer2)
Definition: ConfigBA.cpp:311
bool __lm_use_diagonal_damp
Definition: ConfigBA.h:110
bool __abort_flag
Definition: ConfigBA.h:122
bool __depth_degeneracy_fix
Definition: ConfigBA.h:143
int __current_iteration
Definition: ConfigBA.h:164
int __verbose_level
Definition: ConfigBA.h:121
bool __lm_check_gradient
Definition: ConfigBA.h:108
int __profile_pba
Definition: ConfigBA.h:174
float __lm_delta_threshold
Definition: ConfigBA.h:93
bool __depth_normalize
Definition: ConfigBA.h:142
int __num_cg_iteration
Definition: ConfigBA.h:165
int __num_lm_success
Definition: ConfigBA.h:166
float __final_mse
Definition: ConfigBA.h:159
int __cg_min_iteration
Definition: ConfigBA.h:87
int __num_jacobian_eval
Definition: ConfigBA.h:169
float __depth_check_epsilon
Definition: ConfigBA.h:145
size_t __memory_usage
Definition: ConfigBA.h:178
float __cg_norm_threshold
Definition: ConfigBA.h:101
float __lm_initial_damp
Definition: ConfigBA.h:98
void SaveBundleStatistics(int ncam, int npt, int nproj)
Definition: ConfigBA.cpp:161
int __num_lm_iteration
Definition: ConfigBA.h:167
int __use_radial_distortion
Definition: ConfigBA.h:115
bool __cg_schur_complement
Definition: ConfigBA.h:105
bool __accurate_gain_ratio
Definition: ConfigBA.h:89
bool __jc_store_original
Definition: ConfigBA.h:136
float __lm_mse_threshold
Definition: ConfigBA.h:96
void SaveBundleRecord(int iter, float res, float damping, float gn, float gi)
Definition: ConfigBA.cpp:329
@ TIMER_PREPROCESSING
Definition: ConfigBA.h:34
@ TIMER_FUNCTION_BC
Definition: ConfigBA.h:43
@ TIMER_FUNCTION_MP
Definition: ConfigBA.h:44
@ TIMER_GPU_DOWNLOAD
Definition: ConfigBA.h:35
@ TIMER_FUNCTION_JTE
Definition: ConfigBA.h:42
@ TIMER_FUNCTION_PJ
Definition: ConfigBA.h:39
@ TIMER_CG_ITERATION
Definition: ConfigBA.h:36
@ TIMER_FUNCTION_JJ
Definition: ConfigBA.h:38
@ TIMER_FUNCTION_JX
Definition: ConfigBA.h:41
@ TIMER_FUNCTION_DD
Definition: ConfigBA.h:40
@ TIMER_GPU_ALLOCATION
Definition: ConfigBA.h:32
@ TIMER_FUNCTION_UP
Definition: ConfigBA.h:45
@ TIMER_OPTIMIZATION
Definition: ConfigBA.h:31
@ TIMER_GPU_UPLOAD
Definition: ConfigBA.h:33
int __num_point_behind
Definition: ConfigBA.h:171
float __data_normalize_median
Definition: ConfigBA.h:144
int __lm_max_iteration
Definition: ConfigBA.h:85
float __lm_damping_auto_switch
Definition: ConfigBA.h:109
float __lm_gradient_threshold
Definition: ConfigBA.h:95
float __lm_maximum_damp
Definition: ConfigBA.h:100
bool __focal_normalize
Definition: ConfigBA.h:141
bool __reset_initial_distortion
Definition: ConfigBA.h:117
int __cg_max_iteration
Definition: ConfigBA.h:86
int __cg_recalculate_freq
Definition: ConfigBA.h:88
int __num_projection_eval
Definition: ConfigBA.h:168
bool __multiply_jx_usenoj
Definition: ConfigBA.h:149
float BundleTimerGetNow(int timer=TIMER_OPTIMIZATION)
Definition: ConfigBA.cpp:320
void ResetTemporarySetting()
Definition: ConfigBA.cpp:151
float __lm_minimum_damp
Definition: ConfigBA.h:99
int __bundle_current_mode
Definition: ConfigBA.h:156
void ResetBundleStatistics()
Definition: ConfigBA.cpp:123
bool __save_gradient_norm
Definition: ConfigBA.h:128
bool __no_jacobian_store
Definition: ConfigBA.h:135
float __initial_mse
Definition: ConfigBA.h:158
bool IsTimeBudgetAvailable()
Definition: ConfigBA.cpp:324
bool __warmup_device
Definition: ConfigBA.h:177
int __recent_cg_status
Definition: ConfigBA.h:173
void CopyFromHost(const void *buf)
Definition: CuTexImage.cpp:109
size_t GetReservedWidth()
Definition: CuTexImage.h:60
void SwapData(CuTexImage &src)
Definition: CuTexImage.cpp:55
void ReleaseData()
Definition: CuTexImage.cpp:49
void CopyToHost(void *buf)
Definition: CuTexImage.cpp:122
float * data()
Definition: CuTexImage.h:53
bool InitTexture(unsigned int width, unsigned int height, unsigned int nchannel=1)
Definition: CuTexImage.cpp:80
bool IsValid()
Definition: CuTexImage.h:54
void SetTexture(void *data, unsigned int width, unsigned int nchannel=1)
Definition: CuTexImage.cpp:98
@ STATUS_ALLOCATION_FAIL
Definition: pba.h:62
@ STATUS_CAMERA_MISSING
Definition: pba.h:58
@ STATUS_POINT_MISSING
Definition: pba.h:59
@ STATUS_SUCCESS
Definition: pba.h:57
@ STATUS_MEASURMENT_MISSING
Definition: pba.h:61
@ STATUS_PROJECTION_MISSING
Definition: pba.h:60
@ BUNDLE_ONLY_STRUCTURE
Definition: pba.h:79
@ BUNDLE_ONLY_MOTION
Definition: pba.h:78
CuTexImage _cuCameraMeasurementListT
CuTexImage _cuCameraData
CuTexImage _cuCameraQListW
float EvaluateProjection(CuTexImage &cam, CuTexImage &point, CuTexImage &proj)
CuTexImage _cuVectorRK
bool CheckRequiredMem(int fresh=1)
void ComputeJtE(CuTexImage &E, CuTexImage &JtE, int mode=0)
CuTexImage _cuCameraMeasurementMap
virtual void SetCameraData(size_t ncam, CameraT *cams)
CuTexImage _cuVectorPK
void EvaluateJacobians(bool shuffle=true)
virtual float GetMeanSquaredError()
CuTexImage _cuVectorJtE
LM normal equation.
const int * _camera_idx
virtual void SetPointData(size_t npoint, Point3D *pts)
virtual void SetFocalMask(const int *fmask, float weight)
CuTexImage _cuCameraDataEX
void SaveBundleRecord(int iter, float res, float damping, float &g_norm, float &g_inf)
virtual int RunBundleAdjustment()
int SolveNormalEquation(float lambda)
CuTexImage _cuVectorSJ
CuTexImage _cuCameraQMap
float SaveUpdatedSystem(float residual_reduction, float dx_sqnorm, float damping)
void ComputeBlockPC(float lambda, bool dampd=true)
CuTexImage _cuImageProj
float UpdateCameraPoint(CuTexImage &dx, CuTexImage &cuImageTempProj)
CuTexImage _cuVectorXK
const float * _imgpt_data
CuTexImage _cuJacobianPoint
float EvaluateProjectionX(CuTexImage &cam, CuTexImage &point, CuTexImage &proj)
void ApplyBlockPC(CuTexImage &v, CuTexImage &pv, int mode=0)
CuTexImage _cuJacobianCamera
void RunTestIterationLM(bool reduced)
SparseBundleCU(int device)
std::vector< float > _imgpt_datax
const int * _focal_mask
void ComputeJX(CuTexImage &X, CuTexImage &JX, int mode=0)
const int * _point_idx
CuTexImage _cuCameraMeasurementList
void ProcessWeightCameraQ(std::vector< int > &cpnum, std::vector< int > &qmap, std::vector< float > &qmapw, std::vector< float > &qlistw)
void ComputeDiagonal(CuTexImage &JJ, CuTexImage &JJI)
virtual void SetProjection(size_t nproj, const Point2D *imgpts, const int *point_idx, const int *cam_idx)
bool ProcessIndexCameraQ(std::vector< int > &qmap, std::vector< int > &qlist)
CuTexImage _cuVectorJJ
CuTexImage _cuPointData
CuTexImage _cuCameraQMapW
void SaveNormalEquation(float lambda)
CuTexImage _cuVectorJX
CuTexImage _cuPointMeasurementMap
int SolveNormalEquationPCGX(float lambda)
CuTexImage _cuJacobianCameraT
CuTexImage _cuCameraQList
CuTexImage _cuMeasurements
CuTexImage _cuProjectionMap
int SolveNormalEquationPCGB(float lambda)
void ReserveStorage(size_t ncam, size_t npt, size_t nproj)
CuTexImage _cuBufferData
CuTexImage _cuVectorZK
CuTexImage _cuPointDataEX
__host__ __device__ float2 fabs(float2 v)
Definition: cutil_math.h:1254
int min(int a, int b)
Definition: cutil_math.h:53
int max(int a, int b)
Definition: cutil_math.h:48
static void error(char *msg)
Definition: lsd.c:159
MiniVec< float, N > floor(const MiniVec< float, N > &a)
Definition: MiniVec.h:75
void ComputeSXYPZ(float a, CuTexImage &texX, CuTexImage &texY, CuTexImage &texZ, CuTexImage &result)
void MultiplyBlockConditioner(int ncam, int npoint, CuTexImage &blocks, CuTexImage &vector, CuTexImage &result, int radial, int mode=0)
void ComputeDiagonalBlock_(float lambda, bool dampd, CuTexImage &camera, CuTexImage &point, CuTexImage &meas, CuTexImage &cmap, CuTexImage &cmlist, CuTexImage &pmap, CuTexImage &jmap, CuTexImage &jp, CuTexImage &sj, CuTexImage &diag, CuTexImage &blocks, bool intrinsic_fixed, int radial_distortion, bool add_existing_diagc, int mode=0)
void ComputeSQRT(CuTexImage &tex)
void ComputeJQtEC(CuTexImage &pe, CuTexImage &qlist, CuTexImage &wq, CuTexImage &sj, CuTexImage &result)
void ComputeJX_(CuTexImage &x, CuTexImage &jx, CuTexImage &camera, CuTexImage &point, CuTexImage &meas, CuTexImage &pjmap, bool intrinsic_fixed, int radial_distortion, int mode=0)
void ComputeSAX(float a, CuTexImage &texX, CuTexImage &result)
void ComputeVXY(CuTexImage &texX, CuTexImage &texY, CuTexImage &result, unsigned int part=0, unsigned int skip=0)
double ComputeVectorNormW(CuTexImage &vector, CuTexImage &weight, CuTexImage &buf)
void ComputeJX(int point_offset, CuTexImage &x, CuTexImage &jc, CuTexImage &jp, CuTexImage &jmap, CuTexImage &result, int mode=0)
void ComputeProjectionQ(CuTexImage &camera, CuTexImage &qmap, CuTexImage &qw, CuTexImage &proj, int offset)
void ClearPreviousError()
void ComputeDiagonalBlock(float lambda, bool dampd, CuTexImage &jc, CuTexImage &cmap, CuTexImage &jp, CuTexImage &pmap, CuTexImage &cmlist, CuTexImage &diag, CuTexImage &blocks, int radial_distortion, bool jc_transpose, bool add_existing_diagc, int mode=0)
void ComputeRSQRT(CuTexImage &tex)
size_t GetCudaMemoryCap()
void ComputeProjectionX(CuTexImage &camera, CuTexImage &point, CuTexImage &meas, CuTexImage &proj_map, CuTexImage &proj, int radial)
void FinishWorkCUDA()
void ComputeSAXPY(float a, CuTexImage &texX, CuTexImage &texY, CuTexImage &result)
int SetCudaDevice(int device)
void ClearTextureObjectCache()
void ComputeDiagonalQ(CuTexImage &qlistw, CuTexImage &sj, CuTexImage &diag)
void ComputeJtE_(CuTexImage &e, CuTexImage &jte, CuTexImage &camera, CuTexImage &point, CuTexImage &meas, CuTexImage &cmap, CuTexImage &cmlist, CuTexImage &pmap, CuTexImage &jmap, CuTexImage &jp, bool intrinsic_fixed, int radial_distortion, int mode=0)
double ComputeVectorDot(CuTexImage &vector1, CuTexImage &vector2, CuTexImage &buf)
void ComputeProjection(CuTexImage &camera, CuTexImage &point, CuTexImage &meas, CuTexImage &proj_map, CuTexImage &proj, int radial)
float ComputeVectorMax(CuTexImage &vector, CuTexImage &buf)
void ComputeJQX(CuTexImage &x, CuTexImage &qmap, CuTexImage &wq, CuTexImage &sj, CuTexImage &jx, int offset)
void ComputeJacobian(CuTexImage &camera, CuTexImage &point, CuTexImage &jc, CuTexImage &jp, CuTexImage &proj_map, CuTexImage &sj, CuTexImage &meas, CuTexImage &cmlist, bool intrinsic_fixed, int radial_distortion, bool shuffle)
void ComputeDiagonal(CuTexImage &jc, CuTexImage &cmap, CuTexImage &jp, CuTexImage &pmap, CuTexImage &cmlist, CuTexImage &jtjd, CuTexImage &jtjdi, bool jc_transpose, int radial, bool add_existing_diagc)
void UpdateCameraPoint(int ncam, CuTexImage &camera, CuTexImage &point, CuTexImage &delta, CuTexImage &new_camera, CuTexImage &new_point, int mode=0)
void ComputeJtE(CuTexImage &pe, CuTexImage &jc, CuTexImage &cmap, CuTexImage &cmlist, CuTexImage &jp, CuTexImage &pmap, CuTexImage &jte, bool jc_transpose, int mode=0)
double ComputeVectorNorm(CuTexImage &vector, CuTexImage &buf)
bool ShuffleCameraJacobian(CuTexImage &jc, CuTexImage &map, CuTexImage &result)
void ResetCurrentDevice()
Definition: ConfigBA.cpp:44
float float_t
static size_t upgrade_dimension(size_t sz)
double timer
Definition: struct.h:215
float_t m[3][3]
Definition: DataInterface.h:53
float_t t[3]
Definition: DataInterface.h:52
float_t radial
Definition: DataInterface.h:54
Definition: lsd.c:149