2019-02-02 14:54:50 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Core/Types.h>
|
|
|
|
|
|
|
|
#include <algorithm>
|
2019-02-02 15:15:53 +00:00
|
|
|
#include <cctype>
|
2019-02-11 12:42:20 +00:00
|
|
|
#include <cmath>
|
2019-02-02 14:54:50 +00:00
|
|
|
#include <queue>
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2019-02-11 12:42:20 +00:00
|
|
|
template <size_t MaxNumHints>
|
2019-02-02 14:54:50 +00:00
|
|
|
class NamePrompter
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using DistanceIndex = std::pair<size_t, size_t>;
|
|
|
|
using DistanceIndexQueue = std::priority_queue<DistanceIndex>;
|
|
|
|
|
|
|
|
static std::vector<String> getHints(const String & name, const std::vector<String> & prompting_strings)
|
|
|
|
{
|
|
|
|
DistanceIndexQueue queue;
|
|
|
|
for (size_t i = 0; i < prompting_strings.size(); ++i)
|
|
|
|
appendToQueue(i, name, queue, prompting_strings);
|
|
|
|
return release(queue, prompting_strings);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2019-02-02 15:13:31 +00:00
|
|
|
static size_t levenshteinDistance(const String & lhs, const String & rhs)
|
2019-02-02 14:54:50 +00:00
|
|
|
{
|
|
|
|
size_t n = lhs.size();
|
|
|
|
size_t m = rhs.size();
|
2019-02-02 15:13:31 +00:00
|
|
|
std::vector<std::vector<size_t>> dp(n + 1, std::vector<size_t>(m + 1));
|
2019-02-02 14:54:50 +00:00
|
|
|
|
|
|
|
for (size_t i = 1; i <= n; ++i)
|
2019-02-02 15:13:31 +00:00
|
|
|
dp[i][0] = i;
|
2019-02-02 14:54:50 +00:00
|
|
|
|
|
|
|
for (size_t i = 1; i <= m; ++i)
|
2019-02-02 15:13:31 +00:00
|
|
|
dp[0][i] = i;
|
2019-02-02 14:54:50 +00:00
|
|
|
|
|
|
|
for (size_t j = 1; j <= m; ++j)
|
|
|
|
{
|
|
|
|
for (size_t i = 1; i <= n; ++i)
|
|
|
|
{
|
|
|
|
if (std::tolower(lhs[i - 1]) == std::tolower(rhs[j - 1]))
|
2019-02-02 15:13:31 +00:00
|
|
|
dp[i][j] = dp[i - 1][j - 1];
|
2019-02-02 14:54:50 +00:00
|
|
|
else
|
2019-02-02 15:13:31 +00:00
|
|
|
dp[i][j] = std::min(dp[i - 1][j] + 1, std::min(dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1));
|
2019-02-02 14:54:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-02 15:13:31 +00:00
|
|
|
return dp[n][m];
|
2019-02-02 14:54:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
|
|
|
|
{
|
2019-02-11 12:42:20 +00:00
|
|
|
const String & prompt = prompting_strings[ind];
|
|
|
|
|
|
|
|
/// Clang SimpleTypoCorrector logic
|
|
|
|
const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size()));
|
|
|
|
const size_t mistake_factor = (name.size() + 2) / 3;
|
|
|
|
if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size())
|
2019-02-02 14:54:50 +00:00
|
|
|
{
|
2019-02-11 12:42:20 +00:00
|
|
|
size_t distance = levenshteinDistance(prompt, name);
|
|
|
|
if (distance <= mistake_factor)
|
2019-02-02 15:15:53 +00:00
|
|
|
{
|
2019-02-02 14:54:50 +00:00
|
|
|
queue.emplace(distance, ind);
|
|
|
|
if (queue.size() > MaxNumHints)
|
|
|
|
queue.pop();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::vector<String> release(DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
|
|
|
|
{
|
|
|
|
std::vector<String> ans;
|
|
|
|
ans.reserve(queue.size());
|
|
|
|
while (!queue.empty())
|
|
|
|
{
|
|
|
|
auto top = queue.top();
|
|
|
|
queue.pop();
|
|
|
|
ans.push_back(prompting_strings[top.second]);
|
|
|
|
}
|
|
|
|
std::reverse(ans.begin(), ans.end());
|
|
|
|
return ans;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|