21 #ifndef otbSampleAugmentation_h
22 #define otbSampleAugmentation_h
39 namespace sampleAugmentation
50 const auto nbSamples = samples.size();
51 const long nbComponents =
static_cast<long>(samples[0].size());
54 for (
size_t i = 0; i < nbSamples; ++i)
56 auto norm_factor = 1.0 / (i + 1);
58 #pragma omp parallel for
60 for (
long j = 0; j < nbComponents; ++j)
62 const auto mu = means[j];
63 const auto x = samples[i][j];
64 auto muNew = mu + (x - mu) * norm_factor;
65 stds[j] += (x - mu) * (x - muNew);
70 #pragma omp parallel for
72 for (
long j = 0; j < nbComponents; ++j)
74 stds[j] = std::sqrt(stds[j] / nbSamples);
86 newSamples.resize(nbSamples);
87 const long long nbSamplesLL =
static_cast<long long>(nbSamples);
90 #pragma omp parallel for
92 for (
long long i = 0; i < nbSamplesLL; ++i)
94 if (imod == inSamples.size())
96 newSamples[i] = inSamples[imod++];
108 const int seed = std::time(
nullptr))
110 newSamples.resize(nbSamples);
111 const long nbComponents =
static_cast<long>(inSamples[0].size());
112 std::random_device rd;
113 std::mt19937 gen(rd());
119 std::vector<std::normal_distribution<double>> gaussDis(nbComponents);
121 #pragma omp parallel for
123 for (
long i = 0; i < nbComponents; ++i)
124 gaussDis[i] = std::normal_distribution<double>{0.0, stds[i] / stdFactor};
127 for (
size_t i = 0; i < nbSamples; ++i)
129 newSamples[i] = inSamples[std::rand() % inSamples.size()];
131 #pragma omp parallel for
133 for (
long j = 0; j < nbComponents; ++j)
134 newSamples[i][j] += gaussDis[j](gen);
155 assert(x.size() == y.size());
157 for (
size_t i = 0; i < x.size(); ++i)
159 dist += (x[i] - y[i]) * (x[i] - y[i]);
161 return dist / (x.size() * x.size());
170 const long long nbSamples =
static_cast<long long>(inSamples.size());
171 nnVector.resize(nbSamples);
173 #pragma omp parallel for
175 for (
long long sampleIdx = 0; sampleIdx < nbSamples; ++sampleIdx)
177 std::priority_queue<NeighborType, NNIndicesType, NeighborSorter> nns;
178 for (
long long neighborIdx = 0; neighborIdx < nbSamples; ++neighborIdx)
180 if (sampleIdx != neighborIdx)
182 nns.push({
static_cast<size_t>(neighborIdx),
ComputeSquareDistance(inSamples[sampleIdx], inSamples[neighborIdx])});
183 if (nns.size() > nbNeighbors)
192 nnv.reserve(nns.size());
195 nnv.push_back(nns.top());
198 std::reverse(std::begin(nnv), std::end(nnv));
200 nnVector[sampleIdx] = std::move(nnv);
209 for (
size_t i = 0; i < s1.size(); ++i)
210 result[i] = s1[i] + (s2[i] - s1[i]) * position;
223 newSamples.resize(nbSamples);
224 const long long nbSamplesLL =
static_cast<long long>(nbSamples);
230 #pragma omp parallel for
232 for (
long long i = 0; i < nbSamplesLL; ++i)
234 const auto sampleIdx = std::rand() % (inSamples.size());
235 const auto sample = inSamples[sampleIdx];
236 const auto neighborIdx = nnVector[sampleIdx][std::rand() % nbNeighbors].index;
237 const auto neighbor = inSamples[neighborIdx];
238 newSamples[i] =
SmoteCombine(sample, neighbor, std::rand() /
double{RAND_MAX});