From 6a1d1f55080dcf53c72501831d7c00e3710e8fa7 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Mon, 11 Feb 2019 15:42:20 +0300 Subject: [PATCH] Better hints provider --- dbms/src/Common/IFactoryWithAliases.h | 5 +++-- dbms/src/Common/NamePrompter.h | 17 +++++++++++++---- .../00834_hints_for_type_function_typos.sh | 10 +++++++++- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/IFactoryWithAliases.h b/dbms/src/Common/IFactoryWithAliases.h index db0b4e37864..f7b930f57c3 100644 --- a/dbms/src/Common/IFactoryWithAliases.h +++ b/dbms/src/Common/IFactoryWithAliases.h @@ -130,9 +130,10 @@ private: /** * prompter for names, if a person makes a typo for some function or type, it - * helps to find best possible match (in particular, edit distance is one or two symbols) + * helps to find best possible match (in particular, edit distance is done like in clang + * (max edit distance is (typo.size() + 2) / 3 and not bigger than) */ - NamePrompter prompter; + NamePrompter prompter; }; } diff --git a/dbms/src/Common/NamePrompter.h b/dbms/src/Common/NamePrompter.h index 21f35a7b9fe..40460c971d0 100644 --- a/dbms/src/Common/NamePrompter.h +++ b/dbms/src/Common/NamePrompter.h @@ -4,12 +4,13 @@ #include #include +#include #include #include namespace DB { -template +template class NamePrompter { public: @@ -53,10 +54,18 @@ private: static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector & prompting_strings) { - if (prompting_strings[ind].size() <= name.size() + MistakeFactor && prompting_strings[ind].size() + MistakeFactor >= name.size()) + const String & prompt = prompting_strings[ind]; + + /// Clang SimpleTypoCorrector logic + const size_t min_possible_edit_distance = std::abs(static_cast(name.size()) - static_cast(prompt.size())); + const size_t mistake_factor = (name.size() + 2) / 3; + if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3) + return; + + if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size()) { - size_t distance = levenshteinDistance(prompting_strings[ind], name); - if (distance <= MistakeFactor) + size_t distance = levenshteinDistance(prompt, name); + if (distance <= mistake_factor) { queue.emplace(distance, ind); if (queue.size() > MaxNumHints) diff --git a/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh index 8650cc2d56b..0352002d160 100755 --- a/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh +++ b/dbms/tests/queries/0_stateless/00834_hints_for_type_function_typos.sh @@ -11,4 +11,12 @@ $CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep "Ma $CLICKHOUSE_CLIENT -q "select POSITIO(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['position'" &>/dev/null; $CLICKHOUSE_CLIENT -q "select fount(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['count'" | grep "Maybe you meant: \['round'" | grep "Or unknown aggregate function" &>/dev/null; $CLICKHOUSE_CLIENT -q "select positin(*) from system.functions;" 2>&1 | grep -v "Or unknown aggregate function" &>/dev/null; -$CLICKHOUSE_CLIENT -q "select pov(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['pow','cos'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select pov(*) from system.functions;" 2>&1 | grep "Maybe you meant: \['pow'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select getColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select gutColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select gupColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select provideColumnStructure('abc');" 2>&1 | grep "Maybe you meant: \['dumpColumnStructure'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select multiposicionutf7('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionUTF8','multiPosition'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select multiposicionutf7casesensitive('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitive'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select multiposicionutf7sensitive('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitive'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "select multiPosicionSensitiveUTF8('abc');" 2>&1 | grep "Maybe you meant: \['multiPositionCaseInsensitiveUTF8'\]" &>/dev/null;