21 #ifndef otbImageToSIFTKeyPointSetFilter_hxx
22 #define otbImageToSIFTKeyPointSetFilter_hxx
26 #include "itkMatrix.h"
27 #include "itkProcessObject.h"
35 template <
class TInputImage,
class TOutputPo
intSet>
44 m_RatioEdgeThreshold(0),
45 m_GradientMagnitudeThreshold(0.2),
48 m_SigmaFactorOrientation(3),
49 m_SigmaFactorDescriptor(1.5),
53 m_ValidatedKeyPoints(0),
54 m_DifferentSamplePoints(0),
55 m_DiscardedKeyPoints(0),
56 m_ChangeSamplePointsMax(2),
58 m_HistogramGaussianWeights({
59 2.3771112282795414e-07, 3.8860734758633732e-07, 6.2655544995978937e-07, 9.9631120821413786e-07, 1.5624909838697011e-06, 2.4167238265599128e-06,
60 3.6865788528530121e-06, 5.5463469229192623e-06, 8.2295774080263437e-06, 1.2043009749602365e-05, 1.738119136656513e-05, 2.4740646513897326e-05,
61 3.4731980398846277e-05, 4.808781565748272e-05, 6.5664032975164266e-05, 8.8431512984476723e-05, 0.00011745555408931643, 0.00015386047198026335,
62 0.00019877765486783745, 0.00025327659834301937, 0.00031828015928190065, 0.00039446735551235698, 0.00048216931692246382, 0.00058126620279441276,
63 0.00069109471776775144, 0.00081037694122312908, 0.00093718121775182789, 0.0010689246133776746, 0.0012024238404411182, 0.0013339976954896103,
64 0.0014596192424447215, 0.0015751106965100009, 0.0016763688464699555, 0.0017596045720966803, 0.0018215772013714365, 0.0018598035923515156,
65 0.0018727231637146351, 0.0018598035923515156, 0.0018215772013714365, 0.0017596045720966803, 0.0016763688464699555, 0.0015751106965100009,
66 0.0014596192424447215, 0.0013339976954896103, 0.0012024238404411182, 0.0010689246133776746, 0.00093718121775182789, 0.00081037694122312908,
67 0.00069109471776775144, 0.00058126620279441276, 0.00048216931692246382, 0.00039446735551235698, 0.00031828015928190065, 0.00025327659834301937,
68 0.00019877765486783745, 0.00015386047198026335, 0.00011745555408931643, 8.8431512984476723e-05, 6.5664032975164266e-05, 4.808781565748272e-05,
69 3.4731980398846277e-05, 2.4740646513897326e-05, 1.738119136656513e-05, 1.2043009749602365e-05, 8.2295774080263437e-06, 5.5463469229192623e-06,
70 3.6865788528530121e-06, 2.4167238265599128e-06, 1.5624909838697011e-06, 9.9631120821413786e-07, 6.2655544995978937e-07, 3.8860734758633732e-07,
71 2.3771112282795414e-07})
91 template <
class TInputImage,
class TOutputPo
intSet>
95 InitializeInputImage();
98 typename InputImageType::PointType point;
99 typename InputImageType::IndexType index;
103 m_ExpandFilter->GetOutput()->TransformIndexToPhysicalPoint(index, point);
106 unsigned int lOctave = 0;
107 m_Sigmak = std::pow(2,
static_cast<double>(1 / (
double)(m_ScalesNumber + 1)));
108 m_RatioEdgeThreshold = (m_EdgeThreshold + 1) * (m_EdgeThreshold + 1) / m_EdgeThreshold;
110 for (lOctave = 0; lOctave != m_OctavesNumber; lOctave++)
112 m_DifferentSamplePoints = 0;
113 m_DiscardedKeyPoints = 0;
115 typename InputImageType::PointType origin0 = input->GetOrigin();
117 ComputeDifferenceOfGaussian(input);
118 DetectKeyPoint(lOctave);
122 m_ShrinkFilter = ShrinkFilterType::New();
123 m_ShrinkFilter->SetInput(m_LastGaussian);
124 m_ShrinkFilter->SetShrinkFactors(m_ShrinkFactors);
125 m_ShrinkFilter->Update();
127 input = m_ShrinkFilter->GetOutput();
129 typename InputImageType::PointType origin1;
130 typename InputImageType::SpacingType spacing = input->GetSignedSpacing();
132 origin1[0] = origin0[0] + spacing[0] * 0.25;
133 origin1[1] = origin0[1] + spacing[1] * 0.25;
135 input->SetOrigin(origin1);
137 otbGenericMsgDebugMacro(<<
"ImageToSIFTKeyPointSetFilter:: Number total key points : " << m_ValidatedKeyPoints);
138 otbGenericMsgDebugMacro(<<
"ImageToSIFTKeyPointSetFilter:: Number different sample key points per octave : " << m_DifferentSamplePoints);
139 otbGenericMsgDebugMacro(<<
"ImageToSIFTKeyPointSetFilter:: Number discarded key points per octave : " << m_DiscardedKeyPoints);
143 otbGenericMsgDebugMacro(<<
"ImageToSIFTKeyPointSetFilter:: Total number key points : " << this->GetOutput()->GetNumberOfPoints());
149 template <
class TInputImage,
class TOutputPo
intSet>
152 m_ExpandFilter->SetInput(this->GetInput());
153 m_ExpandFilter->SetExpandFactors(m_ExpandFactors);
154 m_ExpandFilter->Update();
157 typename InputImageType::PointType origin0 = this->GetInput()->GetOrigin();
158 typename InputImageType::PointType origin1;
159 typename InputImageType::SpacingType spacing = m_ExpandFilter->GetOutput()->GetSignedSpacing();
161 origin1[0] = origin0[0] - spacing[0] * 0.5;
162 origin1[1] = origin0[1] - spacing[1] * 0.5;
164 m_ExpandFilter->GetOutput()->SetOrigin(origin1);
170 template <
class TInputImage,
class TOutputPo
intSet>
173 unsigned int lScale = 0;
176 m_DoGList = ImageListType::New();
178 m_MagnitudeList = ImageListType::New();
179 m_OrientationList = ImageListType::New();
189 double xsigman = std::abs(input->GetSignedSpacing()[0]) * m_Sigma0;
190 double ysigman = std::abs(input->GetSignedSpacing()[1]) * m_Sigma0;
192 for (lScale = 0; lScale != m_ScalesNumber + 2; lScale++)
194 m_XGaussianFilter = GaussianFilterType::New();
195 m_YGaussianFilter = GaussianFilterType::New();
197 m_XGaussianFilter->SetSigma(xsigman);
198 m_XGaussianFilter->SetDirection(0);
199 m_XGaussianFilter->SetInput(input);
201 m_YGaussianFilter->SetSigma(ysigman);
202 m_YGaussianFilter->SetDirection(1);
203 m_YGaussianFilter->SetInput(m_XGaussianFilter->GetOutput());
205 m_YGaussianFilter->Update();
207 m_GradientFilter = GradientFilterType::New();
208 m_MagnitudeFilter = MagnitudeFilterType::New();
209 m_OrientationFilter = OrientationFilterType::New();
211 m_GradientFilter->SetInput(m_YGaussianFilter->GetOutput());
212 m_MagnitudeFilter->SetInput(m_GradientFilter->GetOutput());
213 m_OrientationFilter->SetInput(m_GradientFilter->GetOutput());
215 m_MagnitudeFilter->Update();
216 m_OrientationFilter->Update();
218 m_MagnitudeList->PushBack(m_MagnitudeFilter->GetOutput());
219 m_OrientationList->PushBack(m_OrientationFilter->GetOutput());
223 m_SubtractFilter = SubtractFilterType::New();
224 m_SubtractFilter->SetInput1(m_YGaussianFilter->GetOutput());
225 m_SubtractFilter->SetInput2(previousGaussian);
226 m_SubtractFilter->Update();
227 m_DoGList->PushBack(m_SubtractFilter->GetOutput());
230 previousGaussian = m_YGaussianFilter->GetOutput();
231 xsigman = xsigman * m_Sigmak;
232 ysigman = ysigman * m_Sigmak;
234 m_LastGaussian = previousGaussian;
241 template <
class TInputImage,
class TOutputPo
intSet>
245 if (m_ScalesNumber > 1)
248 unsigned int lScale = 1;
250 typename InputImageType::SpacingType spacing = lIterDoG.Get()->GetSignedSpacing();
253 while ((lIterDoG + 1) != m_DoGList->End())
259 lMaximumCalculator->SetImage(lIterDoG.Get());
260 lMaximumCalculator->Compute();
262 typename InputImageType::SizeType lRadius;
271 while (!lIterCurrent.IsAtEnd() && !lIterLowerAdjacent.IsAtEnd() && !lIterUpperAdjacent.IsAtEnd())
274 if (IsLocalExtremum(lIterCurrent, lIterLowerAdjacent, lIterUpperAdjacent))
279 unsigned int lChangeSamplePoints = 0;
284 bool accepted =
false;
286 while (lChangeSamplePoints < m_ChangeSamplePointsMax && changed)
288 accepted = RefineLocationKeyPoint(neighborCurrentScale, neighborPreviousScale, neighborNextScale, lTranslation);
292 lTranslateOffset[0] +=
static_cast<int>(lTranslation[0] > 0.5);
293 lTranslateOffset[0] += -
static_cast<int>(lTranslation[0] < -0.5);
295 lTranslateOffset[1] +=
static_cast<int>(lTranslation[1] > 0.5);
296 lTranslateOffset[1] += -
static_cast<int>(lTranslation[1] < -0.5);
300 if (moveIterator.InBounds())
302 changed = lTranslateOffset != lOffsetZero;
305 neighborCurrentScale += lTranslateOffset;
306 neighborPreviousScale += lTranslateOffset;
307 neighborNextScale += lTranslateOffset;
313 ++lChangeSamplePoints;
317 ++m_DifferentSamplePoints;
323 std::vector<PixelType> lOrientations = ComputeKeyPointOrientations(neighborCurrentScale, lScale, lTranslation[2]);
326 for (
typename std::vector<PixelType>::iterator orientationIt = lOrientations.begin(); orientationIt != lOrientations.end(); ++orientationIt)
329 std::vector<PixelType> lDescriptors = ComputeKeyPointDescriptor(neighborCurrentScale, lScale, *orientationIt);
333 lIterDoG.Get()->TransformIndexToPhysicalPoint(neighborCurrentScale.GetIndex(), keyPoint);
334 keyPoint[0] += spacing[0] * lTranslation[0];
335 keyPoint[1] += spacing[1] * lTranslation[1];
337 outputPointSet->SetPoint(m_ValidatedKeyPoints, keyPoint);
343 typename std::vector<PixelType>::const_iterator lIterDescriptor = lDescriptors.begin();
345 unsigned int lIndDesc = 0;
346 while (lIterDescriptor != lDescriptors.end())
348 data.SetElement(lIndDesc, *lIterDescriptor);
352 outputPointSet->SetPointData(m_ValidatedKeyPoints, data);
354 ++m_ValidatedKeyPoints;
360 ++lIterLowerAdjacent;
361 ++lIterUpperAdjacent;
373 template <
class TInputImage,
class TOutputPo
intSet>
378 bool isMin = currentScale.GetCenterPixel() < currentScale.GetPixel(m_Offsets[0]);
379 bool isMax = currentScale.GetCenterPixel() > currentScale.GetPixel(m_Offsets[0]);
380 bool isExtremum = isMin || isMax;
381 unsigned int lIterOffset = 0;
384 while (isExtremum && lIterOffset != 8)
389 isExtremum = currentScale.GetCenterPixel() < currentScale.GetPixel(off) && currentScale.GetCenterPixel() < previousScale.GetPixel(off) &&
390 currentScale.GetCenterPixel() < nextScale.GetPixel(off);
394 isExtremum = currentScale.GetCenterPixel() > currentScale.GetPixel(off) && currentScale.GetCenterPixel() > previousScale.GetPixel(off) &&
395 currentScale.GetCenterPixel() > nextScale.GetPixel(off);
399 if (isExtremum && isMin)
401 isExtremum = currentScale.GetCenterPixel() < previousScale.GetCenterPixel() && currentScale.GetCenterPixel() < nextScale.GetCenterPixel();
403 else if (isExtremum && isMax)
405 isExtremum = currentScale.GetCenterPixel() > previousScale.GetCenterPixel() && currentScale.GetCenterPixel() > nextScale.GetCenterPixel();
413 template <
class TInputImage,
class TOutputPo
intSet>
418 bool accepted =
true;
422 PixelType dx = 0.5 * (currentScale.GetPixel(m_Offsets[6]) - currentScale.GetPixel(m_Offsets[1]));
424 PixelType dy = 0.5 * (currentScale.GetPixel(m_Offsets[4]) - currentScale.GetPixel(m_Offsets[3]));
426 PixelType ds = 0.5 * (nextScale.GetCenterPixel() - previousScale.GetCenterPixel());
428 PixelType dxx = currentScale.GetPixel(m_Offsets[6]) - 2 * currentScale.GetCenterPixel() + currentScale.GetPixel(m_Offsets[1]);
430 PixelType dyy = currentScale.GetPixel(m_Offsets[3]) - 2 * currentScale.GetCenterPixel() + currentScale.GetPixel(m_Offsets[4]);
432 PixelType dss = previousScale.GetCenterPixel() - 2 * currentScale.GetCenterPixel() + nextScale.GetCenterPixel();
434 PixelType dxy = 0.25 * (currentScale.GetPixel(m_Offsets[7]) + currentScale.GetPixel(m_Offsets[0]) - currentScale.GetPixel(m_Offsets[2]) -
435 currentScale.GetPixel(m_Offsets[5]));
437 PixelType dxs = 0.25 * (nextScale.GetPixel(m_Offsets[6]) + previousScale.GetPixel(m_Offsets[1]) - nextScale.GetPixel(m_Offsets[1]) -
438 previousScale.GetPixel(m_Offsets[6]));
440 PixelType dys = 0.25 * (nextScale.GetPixel(m_Offsets[4]) + previousScale.GetPixel(m_Offsets[3]) - nextScale.GetPixel(m_Offsets[3]) -
441 previousScale.GetPixel(m_Offsets[4]));
444 double det = dxx * (dyy * dss - dys * dys) - dxy * (dxy * dss - dxs * dys) + dxs * (dxy * dys - dxs * dyy);
447 solution[0] = -dx * (dyy * dss - dys * dys) - dy * (dxs * dys - dxy * dss) - ds * (dxy * dys - dyy * dxs);
448 solution[1] = -dx * (dys * dxs - dss * dxy) - dy * (dxx * dss - dxs * dxs) - ds * (dxs * dxy - dxx * dys);
449 solution[2] = -dx * (dxy * dys - dxs * dyy) - dy * (dxy * dxs - dxx * dys) - ds * (dxx * dyy - dxy * dxy);
452 PixelType lDoGInterpolated = det * currentScale.GetCenterPixel() + 0.5 * (dx * solution[0] + dy * solution[1] + ds * solution[2]);
454 PixelType lHessianTrace2 = (dxx + dyy) * (dxx + dyy);
455 PixelType lHessianDet = dxx * dyy - dxy * dxy;
458 accepted = fabs(lDoGInterpolated) >= fabs(det * m_DoGThreshold);
462 accepted = accepted && fabs(lHessianTrace2) < fabs(m_RatioEdgeThreshold * lHessianDet);
466 ++m_DiscardedKeyPoints;
483 template <
class TInputImage,
class TOutputPo
intSet>
484 std::vector<typename ImageToSIFTKeyPointSetFilter<TInputImage, TOutputPointSet>::PixelType>
489 unsigned int radius = 4;
490 double lSigma = scale * 3;
491 unsigned int nbBins = 36;
492 double binWidth = 360. / nbBins;
495 std::vector<double> lHistogram(nbBins, 0.), lSmoothedHistogram(nbBins, 0.);
498 typename InputImageType::RegionType region;
499 typename InputImageType::RegionType::SizeType regionSize;
500 typename InputImageType::RegionType::IndexType regionIndex;
501 regionSize.Fill(2 * radius + 2);
502 region.SetSize(regionSize);
503 regionIndex[0] = currentScale.GetIndex()[0] - regionSize[0] / 2;
504 regionIndex[1] = currentScale.GetIndex()[1] - regionSize[1] / 2;
505 region.SetIndex(regionIndex);
507 if (!region.Crop(m_OrientationList->GetNthElement(scale)->GetLargestPossibleRegion()))
509 itkExceptionMacro(<<
"Region " << region <<
" is strictly outside the largest possible region!");
515 lIterOrientation.GoToBegin();
516 lIterMagn.GoToBegin();
519 while (!lIterOrientation.IsAtEnd() && !lIterMagn.IsAtEnd())
523 float dx = lIterMagn.GetIndex()[0] - currentScale.GetIndex()[0];
524 float dy = lIterMagn.GetIndex()[1] - currentScale.GetIndex()[1];
525 float dist = std::sqrt(dx * dx + dy * dy);
531 PixelType lOrientation = lIterOrientation.Get();
535 double lWeightMagnitude = std::exp(-dist * dist / (2 * lSigma * lSigma));
538 unsigned int lHistoIndex =
static_cast<unsigned int>(std::floor(nbBins * lOrientation / (
CONST_2PI)));
541 lHistogram[lHistoIndex] += lMagnitude * lWeightMagnitude;
549 double secondMax = 0;
557 for (i = 0; i < static_cast<int>(nbBins); ++i)
560 for (j = i - nbBins; j < i; ++j)
562 sum += lHistogram[i - j - 1] * m_HistogramGaussianWeights[j + nbBins];
564 lSmoothedHistogram[i] = sum;
568 for (i = 0; i < static_cast<int>(nbBins); ++i)
570 if (lSmoothedHistogram[i] > max)
574 max = lSmoothedHistogram[i];
577 else if (sum > secondMax)
579 secondMax = lSmoothedHistogram[i];
584 std::vector<PixelType> orientations;
587 double x1, x2, x3, y1, y2, y3, a, b, num, denom, orientation;
588 x1 = (maxIndex - 1) * binWidth + binWidth / 2;
589 y1 = lSmoothedHistogram[(maxIndex - 1) < 0 ? maxIndex - 1 + nbBins : maxIndex - 1];
590 x2 = (maxIndex)*binWidth + binWidth / 2;
591 y2 = lSmoothedHistogram[maxIndex];
592 x3 = (maxIndex + 1) * binWidth + binWidth / 2;
593 y3 = lSmoothedHistogram[maxIndex + 1 >
static_cast<int>(nbBins) - 1 ? maxIndex + 1 - nbBins : maxIndex + 1];
595 denom = x1 * x1 * x2 + x2 * x2 * x3 + x3 * x3 * x1 - x1 * x1 * x3 - x2 * x2 * x1 - x3 * x3 * x2;
596 num = y1 * x2 + y2 * x3 + y3 * x1 - y1 * x3 - y2 * x1 - y3 * x2;
598 if (denom == 0 || num == 0)
605 b = ((y1 - y2) - a * (x1 * x1 - x2 * x2)) / (x1 - x2);
607 orientation = -b / (2 * a);
612 else if (orientation >= 360)
618 orientations.push_back(
static_cast<PixelType>(orientation));
662 template <
class TInputImage,
class TOutputPo
intSet>
663 std::vector<typename ImageToSIFTKeyPointSetFilter<TInputImage, TOutputPointSet>::PixelType>
667 std::vector<PixelType> lHistogram(128, 0.);
669 typename InputImageType::RegionType region;
670 typename InputImageType::RegionType::SizeType regionSize;
671 typename InputImageType::RegionType::IndexType regionIndex;
673 unsigned int nbHistograms = 4;
674 unsigned int nbPixelsPerHistogram = 4;
675 unsigned int nbBinsPerHistogram = 8;
677 float radius =
static_cast<float>(nbHistograms / 2 * nbPixelsPerHistogram);
682 regionSize[0] = nbHistograms * nbPixelsPerHistogram + 2;
683 regionSize[1] = nbHistograms * nbPixelsPerHistogram + 2;
687 double lSigma = radius;
690 regionIndex[0] = currentScale.GetIndex()[0] - regionSize[0] / 2;
691 regionIndex[1] = currentScale.GetIndex()[1] - regionSize[1] / 2;
693 region.SetIndex(regionIndex);
694 region.SetSize(regionSize);
697 if (!region.Crop(m_OrientationList->GetNthElement(scale)->GetLargestPossibleRegion()))
699 itkExceptionMacro(<<
"Region " << region <<
" is outside of the largest possible region!");
703 lIterMagnitude.GoToBegin();
704 lIterOrientation.GoToBegin();
707 while (!lIterMagnitude.IsAtEnd() && !lIterOrientation.IsAtEnd())
710 float dx = lIterMagnitude.GetIndex()[0] - currentScale.GetIndex()[0];
711 float dy = lIterMagnitude.GetIndex()[1] - currentScale.GetIndex()[1];
712 float dist = std::sqrt(dx * dx + dy * dy);
719 float cosangle = std::cos(-angle);
720 float sinangle = std::sin(-angle);
721 float rdx = dx * cosangle - dy * sinangle;
722 float rdy = dx * sinangle + dy * cosangle;
724 unsigned int xHistogramIndex =
static_cast<unsigned int>(std::floor((rdx + radius) /
static_cast<float>(nbPixelsPerHistogram)));
725 unsigned int yHistogramIndex =
static_cast<unsigned int>(std::floor((rdy + radius) /
static_cast<float>(nbPixelsPerHistogram)));
728 float compensatedOrientation = lIterOrientation.Get() - angle;
729 if (compensatedOrientation < 0)
737 unsigned int histogramBin =
static_cast<unsigned int>(std::floor(compensatedOrientation * nbBinsPerHistogram / (
CONST_2PI)));
740 double lWeightMagnitude = std::exp(-(dist * dist) / (2 * lSigma * lSigma));
743 unsigned int descriptorIndex = yHistogramIndex * nbBinsPerHistogram * nbHistograms + xHistogramIndex * nbBinsPerHistogram + histogramBin;
744 lHistogram[descriptorIndex] += lIterMagnitude.Get() * lWeightMagnitude;
752 typename std::vector<PixelType>::iterator lIterHisto = lHistogram.begin();
755 while (lIterHisto != lHistogram.end())
757 lNorm = lNorm + (*lIterHisto) * (*lIterHisto);
760 lNorm = std::sqrt(lNorm);
762 lIterHisto = lHistogram.begin();
763 while (lIterHisto != lHistogram.end())
767 *lIterHisto = (*lIterHisto) / lNorm;
771 *lIterHisto = m_GradientMagnitudeThreshold;
775 if (*lIterHisto > m_GradientMagnitudeThreshold)
777 *lIterHisto = m_GradientMagnitudeThreshold;
783 lIterHisto = lHistogram.begin();
786 while (lIterHisto != lHistogram.end())
788 lNorm = lNorm + (*lIterHisto) * (*lIterHisto);
791 lNorm = std::sqrt(lNorm);
793 lIterHisto = lHistogram.begin();
794 while (lIterHisto != lHistogram.end())
796 *lIterHisto = (*lIterHisto) / lNorm;
806 template <
class TInputImage,
class TOutputPo
intSet>
811 Superclass::PrintSelf(os, indent);
812 os << indent <<
"Number of octaves: " << m_OctavesNumber << std::endl;
813 os << indent <<
"Number of scales: " << m_ScalesNumber << std::endl;
815 os << indent <<
"Number of SIFT key points: " << output->GetNumberOfPoints() << std::endl;