More enhancements for query obfuscator

This commit is contained in:
Alexey Milovidov 2021-10-03 05:56:32 +03:00
parent b63b8d5446
commit 0bf597374f
3 changed files with 26 additions and 6 deletions

View File

@ -25,6 +25,8 @@
#include <Storages/StorageFactory.h>
#include <Storages/registerStorages.h>
#include <DataTypes/DataTypeFactory.h>
#include <Formats/FormatFactory.h>
#include <Formats/registerFormats.h>
#pragma GCC diagnostic ignored "-Wunused-function"
@ -114,6 +116,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
registerAggregateFunctions();
registerTableFunctions();
registerStorages();
registerFormats();
std::unordered_set<std::string> additional_names;
@ -130,6 +133,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
return FunctionFactory::instance().has(what)
|| AggregateFunctionFactory::instance().isAggregateFunctionName(what)
|| TableFunctionFactory::instance().isTableFunctionName(what)
|| FormatFactory::instance().isOutputFormat(what)
|| FormatFactory::instance().isInputFormat(what)
|| additional_names.count(what);
};

View File

@ -276,29 +276,37 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf)
{
case '+':
{
if (has_sign || has_number)
if (has_sign)
{
if (has_number)
return ReturnType(true);
if constexpr (throw_exception)
throw ParsingException(
"Cannot parse number with multiple sign (+/-) characters or intermediate sign character",
"Cannot parse number with multiple sign (+/-) characters",
ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
has_sign = true;
break;
}
case '-':
{
if (has_sign || has_number)
if (has_sign)
{
if (has_number)
return ReturnType(true);
if constexpr (throw_exception)
throw ParsingException(
"Cannot parse number with multiple sign (+/-) characters or intermediate sign character",
"Cannot parse number with multiple sign (+/-) characters",
ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
if constexpr (is_signed_v<T>)
negative = true;
else

View File

@ -38,7 +38,8 @@ const std::unordered_set<std::string_view> keywords
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE",
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE",
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED",
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY"
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY", "OFFSET",
"TRIM", "LTRIM", "RTRIM", "BOTH", "LEADING", "TRAILING"
};
const std::unordered_set<std::string_view> keep_words
@ -906,7 +907,13 @@ void obfuscateQueries(
/// Write quotes and the obfuscated content inside.
result.write(*token.begin);
obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func);
/// If it is long, just replace it with hash. Long identifiers in queries are usually auto-generated.
if (token.size() > 32)
writeIntText(sipHash64(token.begin + 1, token.size() - 2), result);
else
obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func);
result.write(token.end[-1]);
}
else if (token.type == TokenType::Number)