This commit is contained in:
Nikita Mikhaylov 2024-04-25 15:50:29 +00:00
parent 6fe4203d44
commit 239f9e2059
3 changed files with 22 additions and 13 deletions

View File

@ -1,6 +1,4 @@
#include <Parsers/CommonParsers.h>
#include <algorithm>
#include <cassert>
#include <Parsers/obfuscateQueries.h>
#include <Parsers/Lexer.h>
@ -12,8 +10,13 @@
#include <IO/WriteBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromMemory.h>
#include <algorithm>
#include <cassert>
#include <iterator>
#include <boost/algorithm/string.hpp>
namespace DB
{
@ -27,11 +30,11 @@ namespace ErrorCodes
namespace
{
const std::unordered_set<std::string_view> & getObfuscateKeywords()
const std::unordered_set<std::string> & getObfuscateKeywords()
{
auto initialize = []()
{
std::unordered_set<std::string_view> instance = {
std::unordered_set<std::string> instance = {
"!=",
"",
"%",
@ -60,12 +63,20 @@ const std::unordered_set<std::string_view> & getObfuscateKeywords()
"]+$"
};
auto & global_keywords = getAllKeyWords();
std::copy(global_keywords.begin(), global_keywords.end(), std::inserter(instance, instance.begin()));
for (const auto & keyword : getAllKeyWords())
{
/// The keyword may consist of several tokens (ORDER BY or GROUP BY)
/// We will split them and add separately.
std::vector<std::string> tokens;
boost::split(tokens, keyword, [](char c) { return c == ' '; });
for (const auto & token : tokens)
instance.insert(token);
}
return instance;
};
static std::unordered_set<std::string_view> instance = initialize();
static std::unordered_set<std::string> instance = initialize();
return instance;
};
@ -945,11 +956,9 @@ void obfuscateQueries(
if (token.type == TokenType::BareWord)
{
std::string whole_token_uppercase(whole_token);
Poco::toUpperInPlace(whole_token_uppercase);
auto whole_token_uppercase = Poco::toUpper(toString(whole_token));
if (getObfuscateKeywords().contains(whole_token_uppercase)
|| known_identifier_func(whole_token))
if (getObfuscateKeywords().contains(whole_token_uppercase) || known_identifier_func(whole_token))
{
/// Keep keywords as is.
result.write(token.begin, token.size());

View File

@ -1,3 +1,3 @@
select 1 order by 1 with fill step 1
SELECT id, untuple(id) FROM id
SELECT id, mannWhitneyUTest(id) FROM id
SELECT 1 IS NULL

View File

@ -7,5 +7,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
obf="$CLICKHOUSE_FORMAT --obfuscate"
echo "select 1 order by 1 with fill step 1" | $obf
echo "SELECT id, untuple(id) FROM id" | $obf
echo "SELECT id, mannWhitneyUTest(id) FROM id" | $obf
echo "SELECT 1 IS NULL" | $obf