Better hints provider

This commit is contained in:
Danila Kutenin 2019-02-11 15:42:20 +03:00
parent 81a184c47f
commit 6a1d1f5508
3 changed files with 25 additions and 7 deletions

View File

@ -130,9 +130,10 @@ private:
/**
* prompter for names, if a person makes a typo for some function or type, it
* helps to find best possible match (in particular, edit distance is one or two symbols)
* helps to find best possible match (in particular, edit distance is done like in clang
* (max edit distance is (typo.size() + 2) / 3 and not bigger than)
*/
NamePrompter</*MistakeFactor=*/2, /*MaxNumHints=*/2> prompter;
NamePrompter</*MaxNumHints=*/2> prompter;
};
}

View File

@ -4,12 +4,13 @@
#include <algorithm>
#include <cctype>
#include <cmath>
#include <queue>
#include <utility>
namespace DB
{
template <size_t MistakeFactor, size_t MaxNumHints>
template <size_t MaxNumHints>
class NamePrompter
{
public:
@ -53,10 +54,18 @@ private:
static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
{
if (prompting_strings[ind].size() <= name.size() + MistakeFactor && prompting_strings[ind].size() + MistakeFactor >= name.size())
const String & prompt = prompting_strings[ind];
/// Clang SimpleTypoCorrector logic
const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size()));
const size_t mistake_factor = (name.size() + 2) / 3;
if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3)
return;
if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size())
{
size_t distance = levenshteinDistance(prompting_strings[ind], name);
if (distance <= MistakeFactor)
size_t distance = levenshteinDistance(prompt, name);
if (distance <= mistake_factor)
{
queue.emplace(distance, ind);
if (queue.size() > MaxNumHints)

View File

@ -11,4 +11,12 @@ $CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep "Ma
$CLICKHOUSE_CLIENT -q "select POSITIO(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['position'" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select fount(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" | grep "Maybe you meant: \['round'" | grep "Or unknown aggregate function" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep -v "Or unknown aggregate function" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select pov(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['pow','cos'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select pov(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['pow'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select getColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select gutColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select gupColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select provideColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select multiposicionutf7('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionUTF8','multiPosition'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select multiposicionutf7casesensitive('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitive'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select multiposicionutf7sensitive('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitive'\]" &>/dev/null;
$CLICKHOUSE_CLIENT -q "select multiPosicionSensitiveUTF8('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitiveUTF8'\]" &>/dev/null;