2019-02-02 14:54:50 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/types.h>
|
2020-03-18 18:26:40 +00:00
|
|
|
#include <Common/PODArray.h>
|
2019-02-02 14:54:50 +00:00
|
|
|
|
|
|
|
#include <algorithm>
|
2019-02-02 15:15:53 +00:00
|
|
|
#include <cctype>
|
2019-02-11 12:42:20 +00:00
|
|
|
#include <cmath>
|
2019-02-18 01:18:39 +00:00
|
|
|
#include <memory>
|
2019-02-02 14:54:50 +00:00
|
|
|
#include <queue>
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2019-02-11 12:42:20 +00:00
|
|
|
template <size_t MaxNumHints>
|
2019-02-02 14:54:50 +00:00
|
|
|
class NamePrompter
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using DistanceIndex = std::pair<size_t, size_t>;
|
|
|
|
using DistanceIndexQueue = std::priority_queue<DistanceIndex>;
|
|
|
|
|
|
|
|
static std::vector<String> getHints(const String & name, const std::vector<String> & prompting_strings)
|
|
|
|
{
|
|
|
|
DistanceIndexQueue queue;
|
|
|
|
for (size_t i = 0; i < prompting_strings.size(); ++i)
|
|
|
|
appendToQueue(i, name, queue, prompting_strings);
|
|
|
|
return release(queue, prompting_strings);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2019-02-02 15:13:31 +00:00
|
|
|
static size_t levenshteinDistance(const String & lhs, const String & rhs)
|
2019-02-02 14:54:50 +00:00
|
|
|
{
|
2019-02-18 01:18:39 +00:00
|
|
|
size_t m = lhs.size();
|
|
|
|
size_t n = rhs.size();
|
2019-02-02 14:54:50 +00:00
|
|
|
|
2020-03-18 18:26:40 +00:00
|
|
|
PODArrayWithStackMemory<size_t, 64> row(n + 1);
|
2019-02-02 14:54:50 +00:00
|
|
|
|
2019-02-18 01:18:39 +00:00
|
|
|
for (size_t i = 1; i <= n; ++i)
|
|
|
|
row[i] = i;
|
2019-02-02 14:54:50 +00:00
|
|
|
|
|
|
|
for (size_t j = 1; j <= m; ++j)
|
|
|
|
{
|
2019-02-18 01:18:39 +00:00
|
|
|
row[0] = j;
|
|
|
|
size_t prev = j - 1;
|
2019-02-02 14:54:50 +00:00
|
|
|
for (size_t i = 1; i <= n; ++i)
|
|
|
|
{
|
2019-02-18 01:18:39 +00:00
|
|
|
size_t old = row[i];
|
|
|
|
row[i] = std::min(prev + (std::tolower(lhs[j - 1]) != std::tolower(rhs[i - 1])),
|
|
|
|
std::min(row[i - 1], row[i]) + 1);
|
|
|
|
prev = old;
|
2019-02-02 14:54:50 +00:00
|
|
|
}
|
|
|
|
}
|
2019-02-18 01:18:39 +00:00
|
|
|
return row[n];
|
2019-02-02 14:54:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
|
|
|
|
{
|
2019-02-11 12:42:20 +00:00
|
|
|
const String & prompt = prompting_strings[ind];
|
|
|
|
|
|
|
|
/// Clang SimpleTypoCorrector logic
|
|
|
|
const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size()));
|
|
|
|
const size_t mistake_factor = (name.size() + 2) / 3;
|
|
|
|
if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size())
|
2019-02-02 14:54:50 +00:00
|
|
|
{
|
2019-02-11 12:42:20 +00:00
|
|
|
size_t distance = levenshteinDistance(prompt, name);
|
|
|
|
if (distance <= mistake_factor)
|
2019-02-02 15:15:53 +00:00
|
|
|
{
|
2019-02-02 14:54:50 +00:00
|
|
|
queue.emplace(distance, ind);
|
|
|
|
if (queue.size() > MaxNumHints)
|
|
|
|
queue.pop();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::vector<String> release(DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
|
|
|
|
{
|
2020-08-08 00:47:03 +00:00
|
|
|
std::vector<String> answer;
|
|
|
|
answer.reserve(queue.size());
|
2019-02-02 14:54:50 +00:00
|
|
|
while (!queue.empty())
|
|
|
|
{
|
|
|
|
auto top = queue.top();
|
|
|
|
queue.pop();
|
2020-08-08 00:47:03 +00:00
|
|
|
answer.push_back(prompting_strings[top.second]);
|
2019-02-02 14:54:50 +00:00
|
|
|
}
|
2020-08-08 00:47:03 +00:00
|
|
|
std::reverse(answer.begin(), answer.end());
|
|
|
|
return answer;
|
2019-02-02 14:54:50 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-04-01 13:12:54 +00:00
|
|
|
namespace detail
|
|
|
|
{
|
|
|
|
void appendHintsMessageImpl(String & message, const std::vector<String> & hints);
|
|
|
|
}
|
2021-05-19 22:47:10 +00:00
|
|
|
|
|
|
|
template <size_t MaxNumHints, typename Self>
|
2019-04-03 11:13:22 +00:00
|
|
|
class IHints
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
virtual std::vector<String> getAllRegisteredNames() const = 0;
|
|
|
|
|
|
|
|
std::vector<String> getHints(const String & name) const
|
|
|
|
{
|
2021-05-19 22:47:10 +00:00
|
|
|
return prompter.getHints(name, getAllRegisteredNames());
|
2019-04-03 11:13:22 +00:00
|
|
|
}
|
|
|
|
|
2022-04-01 13:12:54 +00:00
|
|
|
void appendHintsMessage(String & message, const String & name) const
|
2022-03-30 12:54:33 +00:00
|
|
|
{
|
2022-03-31 04:14:28 +00:00
|
|
|
auto hints = getHints(name);
|
2022-04-01 13:12:54 +00:00
|
|
|
detail::appendHintsMessageImpl(message, hints);
|
2022-03-30 12:54:33 +00:00
|
|
|
}
|
|
|
|
|
2021-12-20 13:50:06 +00:00
|
|
|
IHints() = default;
|
|
|
|
|
|
|
|
IHints(const IHints &) = default;
|
2022-03-11 21:47:28 +00:00
|
|
|
IHints(IHints &&) noexcept = default;
|
2021-12-20 13:50:06 +00:00
|
|
|
IHints & operator=(const IHints &) = default;
|
2022-03-11 21:47:28 +00:00
|
|
|
IHints & operator=(IHints &&) noexcept = default;
|
2021-12-20 13:50:06 +00:00
|
|
|
|
2019-04-03 11:13:22 +00:00
|
|
|
virtual ~IHints() = default;
|
|
|
|
|
|
|
|
private:
|
|
|
|
NamePrompter<MaxNumHints> prompter;
|
|
|
|
};
|
2019-02-02 14:54:50 +00:00
|
|
|
}
|