Merge pull request #29672 from ClickHouse/clickhouse-obfuscator-2

More enhancements for query obfuscator
This commit is contained in:
alexey-milovidov 2021-10-20 23:06:29 +03:00 committed by GitHub
commit d3df1c02bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 31 additions and 9 deletions

View File

@ -25,6 +25,8 @@
#include <Storages/StorageFactory.h>
#include <Storages/registerStorages.h>
#include <DataTypes/DataTypeFactory.h>
#include <Formats/FormatFactory.h>
#include <Formats/registerFormats.h>
#pragma GCC diagnostic ignored "-Wunused-function"
@ -114,6 +116,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
registerAggregateFunctions();
registerTableFunctions();
registerStorages();
registerFormats();
std::unordered_set<std::string> additional_names;
@ -130,6 +133,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
return FunctionFactory::instance().has(what)
|| AggregateFunctionFactory::instance().isAggregateFunctionName(what)
|| TableFunctionFactory::instance().isTableFunctionName(what)
|| FormatFactory::instance().isOutputFormat(what)
|| FormatFactory::instance().isInputFormat(what)
|| additional_names.count(what);
};

View File

@ -279,29 +279,39 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf)
{
case '+':
{
if (has_sign || has_number)
/// 123+ or +123+, just stop after 123 or +123.
if (has_number)
goto end;
/// No digits read yet, but we already read sign, like ++, -+.
if (has_sign)
{
if constexpr (throw_exception)
throw ParsingException(
"Cannot parse number with multiple sign (+/-) characters or intermediate sign character",
"Cannot parse number with multiple sign (+/-) characters",
ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
has_sign = true;
break;
}
case '-':
{
if (has_sign || has_number)
if (has_number)
goto end;
if (has_sign)
{
if constexpr (throw_exception)
throw ParsingException(
"Cannot parse number with multiple sign (+/-) characters or intermediate sign character",
"Cannot parse number with multiple sign (+/-) characters",
ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
if constexpr (is_signed_v<T>)
negative = true;
else

View File

@ -38,7 +38,8 @@ const std::unordered_set<std::string_view> keywords
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE",
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE",
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED",
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY"
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY", "OFFSET",
"TRIM", "LTRIM", "RTRIM", "BOTH", "LEADING", "TRAILING"
};
const std::unordered_set<std::string_view> keep_words
@ -906,7 +907,13 @@ void obfuscateQueries(
/// Write quotes and the obfuscated content inside.
result.write(*token.begin);
obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func);
/// If it is long, just replace it with hash. Long identifiers in queries are usually auto-generated.
if (token.size() > 32)
writeIntText(sipHash64(token.begin + 1, token.size() - 2), result);
else
obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func);
result.write(token.end[-1]);
}
else if (token.type == TokenType::Number)

View File

@ -1,4 +1,4 @@
SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 72 }
SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 }
SELECT '-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 }
SELECT '1E-9' AS x, toDecimal32(x, 0);
SELECT '1E-8' AS x, toDecimal32(x, 0);

View File

@ -3,8 +3,8 @@ select toInt64('+-1'); -- { serverError 72; }
select toInt64('++1'); -- { serverError 72; }
select toInt64('++'); -- { serverError 72; }
select toInt64('+'); -- { serverError 72; }
select toInt64('1+1'); -- { serverError 72; }
select toInt64('1-1'); -- { serverError 72; }
select toInt64('1+1'); -- { serverError 6; }
select toInt64('1-1'); -- { serverError 6; }
select toInt64(''); -- { serverError 32; }
select toInt64('1');
select toInt64('-1');