From e22413823e39166179f2c17f0b7b933648e36c65 Mon Sep 17 00:00:00 2001 From: kashwy Date: Thu, 11 Aug 2022 12:38:49 -0700 Subject: [PATCH] Kusto-pahse2: fixed toimspan issue and other functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 275 +++++++++++------- .../KustoFunctions/IParserKQLFunction.cpp | 9 +- .../KustoFunctions/KQLDataTypeFunctions.cpp | 76 ++--- .../KustoFunctions/KQLStringFunctions.cpp | 47 ++- .../Kusto/ParserKQLDateTypeTimespan.cpp | 71 ++++- src/Parsers/Lexer.cpp | 2 +- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 169 +++++++++++ .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 4 + 8 files changed, 493 insertions(+), 160 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index ed64110487e..fa0a4c1240b 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,66 +1,171 @@ -# August XX, 2022 -- **DateTimeFunctions** -- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) - `print startofyear(datetime(2017-01-01 10:10:17), -1)` - `print startofyear(datetime(2017-01-01 10:10:17), 0)` - `print startofyear(datetime(2017-01-01 10:10:17), 1)` -- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) - `print week_of_year(datetime(2020-12-31))` - `print week_of_year(datetime(2020-06-15))` - `print week_of_year(datetime(1970-01-01))` - `print week_of_year(datetime(2000-01-01))` - -- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) - `print startofweek(datetime(2017-01-01 10:10:17), -1)` - `print startofweek(datetime(2017-01-01 10:10:17), 0)` - `print startofweek(datetime(2017-01-01 10:10:17), 1)` - -- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) - `print startofmonth(datetime(2017-01-01 10:10:17), -1)` - `print startofmonth(datetime(2017-01-01 10:10:17), 0)` - `print startofmonth(datetime(2017-01-01 10:10:17), 1)` - -- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) - `print startofday(datetime(2017-01-01 10:10:17), -1)` - `print startofday(datetime(2017-01-01 10:10:17), 0)` - `print startofday(datetime(2017-01-01 10:10:17), 1)` - -- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) - `print monthofyear(datetime("2015-12-14"))` - -- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) - `print hourofday(datetime(2015-12-14 18:54:00))` - -- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) - `print getyear(datetime(2015-10-12))` - -- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) - `print getmonth(datetime(2015-10-12))` - -- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) - `print dayofyear(datetime(2015-12-14))` - -- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) - `print (datetime(2015-12-14))` - -- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) - `print unixtime_seconds_todatetime(1546300800)` - -- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) - `print dayofweek(datetime(2015-12-20))` - -- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) - `print now()` - `print now(2d)` - `print now(-2h)` - `print now(5 microseconds)` - `print now(5 seconds)` - `print now(6minutes)` - `print now(-2d) ` - `print now(time(1d))` - ## KQL implemented features -The config setting to allow modify dialect setting. + +# August 15, 2022 + +## DateTpye +- [bool,boolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/bool) + `print bool(1)` + `print boolean(0)` + +- [datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/datetime) + `print datetime(2015-12-31 23:59:59.9)` + `print datetime('2015-12-31 23:59:59.9')` + `print datetime("2015-12-31:)` + +- [guid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/guid) + `print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)` + `print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')` + `print guid('74be27de1e4e49d9b579fe0b331d3642')` + +- [int](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/int) + `print int(1)` + +- [long](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/long) + `print long(16)` + +- [real](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/real) + `print real(1)` + +- [timespan ,time](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/timespan) + **Note** the timespan is used for calculating datatime, so the output is in seconds. e.g. time(1h) = 3600 + `print 1d` + `print 30m` + `print time('0.12:34:56.7')` + `print time(2h)` + `print timespan(2h)` + + +## StringFunctions + +- [base64_encode_fromguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-encode-fromguid-function) +`print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')` +- [base64_decode_toarray](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64_decode_toarrayfunction) +`print base64_decode_toarray('S3VzdG8=')` +- [base64_decode_toguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-decode-toguid-function) +`print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')` +- [replace_regex](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/replace-regex-function) +`print replace_regex('Hello, World!', '.', '\\0\\0')` +- [has_any_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-index-function) +`print idx = has_any_index('this is an example', dynamic(['this', 'example']))` +- [translate](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/translatefunction) +`print translate('krasp', 'otsku', 'spark')` +- [trim](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimfunction) +`print trim('--', '--https://bing.com--')` +- [trim_end](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimendfunction) +`print trim_end('.com', 'bing.com')` +- [trim_start](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimstartfunction) +`print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))` + + + +## DateTimeFunctions +- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) + `print startofyear(datetime(2017-01-01 10:10:17), -1)` + `print startofyear(datetime(2017-01-01 10:10:17), 0)` + `print startofyear(datetime(2017-01-01 10:10:17), 1)` +- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) + `print week_of_year(datetime(2020-12-31))` + `print week_of_year(datetime(2020-06-15))` + `print week_of_year(datetime(1970-01-01))` + `print week_of_year(datetime(2000-01-01))` + +- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) + `print startofweek(datetime(2017-01-01 10:10:17), -1)` + `print startofweek(datetime(2017-01-01 10:10:17), 0)` + `print startofweek(datetime(2017-01-01 10:10:17), 1)` + +- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) + `print startofmonth(datetime(2017-01-01 10:10:17), -1)` + `print startofmonth(datetime(2017-01-01 10:10:17), 0)` + `print startofmonth(datetime(2017-01-01 10:10:17), 1)` + +- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) + `print startofday(datetime(2017-01-01 10:10:17), -1)` + `print startofday(datetime(2017-01-01 10:10:17), 0)` + `print startofday(datetime(2017-01-01 10:10:17), 1)` + +- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) + `print monthofyear(datetime("2015-12-14"))` + +- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) + `print hourofday(datetime(2015-12-14 18:54:00))` + +- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) + `print getyear(datetime(2015-10-12))` + +- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) + `print getmonth(datetime(2015-10-12))` + +- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) + `print dayofyear(datetime(2015-12-14))` + +- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) + `print (datetime(2015-12-14))` + +- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) + `print unixtime_seconds_todatetime(1546300800)` + +- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) + `print dayofweek(datetime(2015-12-20))` + +- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) + `print now()` + `print now(2d)` + `print now(-2h)` + `print now(5microseconds)` + `print now(5seconds)` + `print now(6minutes)` + `print now(-2d) ` + `print now(time(1d))` + + +## Binary functions +- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) + `print binary_and(15, 3) == 3` + `print binary_and(1, 2) == 0` +- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) + `print binary_not(1) == -2` +- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) + `print binary_or(3, 8) == 11` + `print binary_or(1, 2) == 3` +- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) + `print binary_shift_left(1, 1) == 2` + `print binary_shift_left(1, 64) == 1` +- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) + `print binary_shift_right(1, 1) == 0` + `print binary_shift_right(1, 64) == 1` +- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) + `print binary_xor(1, 3) == 2` +- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) + `print bitset_count_ones(42) == 3` + +## IP functions +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) + `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` + `print format_ipv4(3232236031, 24) == '192.168.1.0'` +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) + `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` + `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) + `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` + `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) + `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` + `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) + `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` + `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` + `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` + `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` + + +# August 1, 2022 + +**The config setting to allow modify dialect setting**. - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. For example: @@ -83,51 +188,6 @@ The config setting to allow modify dialect setting. OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` - -## Binary functions -- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) - `print binary_and(15, 3) == 3` - `print binary_and(1, 2) == 0` -- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) - `print binary_not(1) == -2` -- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) - `print binary_or(3, 8) == 11` - `print binary_or(1, 2) == 3` -- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) - `print binary_shift_left(1, 1) == 2` - `print binary_shift_left(1, 64) == 1` -- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) - `print binary_shift_right(1, 1) == 0` - `print binary_shift_right(1, 64) == 1` -- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) - `print binary_xor(1, 3) == 2` -- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) - `print bitset_count_ones(42) == 3` - -## IP functions -- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) - `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` - `print format_ipv4(3232236031, 24) == '192.168.1.0'` -- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) - `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` - `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` -- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) - `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` - `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` - `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` - `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` -- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) - `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` - `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` - `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` - `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` -- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) - `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` - `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` - `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` - `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` - -# August 1, 2022 - **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) `print strcmp('abc','ABC')` @@ -155,7 +215,6 @@ The config setting to allow modify dialect setting. - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` -# July XX, 2022 ## IP functions diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 0b7eb403a22..243b67b7308 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -102,7 +102,6 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - String token = String(pos->begin, pos->end); String new_token; if (!KQLOperators().convert(tokens, pos)) { @@ -115,7 +114,15 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: break; } else + { + String token; + if (pos->type == TokenType::QuotedIdentifier) + token = "'" + String(pos->begin + 1,pos->end - 1) + "'"; + else + token = String(pos->begin, pos->end); + tokens.push_back(token); + } } ++pos; if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 2a59ab8b72a..0f60bf6d326 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -5,28 +5,21 @@ #include #include #include -/* -#include -#include -#include -#include -#include -#include -#include -#include -#include -*/ #include +#include #include namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + bool DatatypeBool::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toBool"); } bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) @@ -59,9 +52,24 @@ bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) bool DatatypeDynamic::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String res = String(pos->begin, pos->end); + String array; + ++pos; //go pass "dynamic" string + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + array += String(pos->begin, pos->end); + } + ++pos; + } + if (pos->type == TokenType::ClosingRoundBracket) + array += String(pos->begin, pos->end); + else + return false; + + out = "array" + array; + return true; } bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) @@ -72,10 +80,8 @@ bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) String guid_str; ++pos; - if (pos->type == TokenType::QuotedIdentifier) - guid_str = std::format("'{}'", String(pos->begin+1, pos->end -1)); - else if (pos->type == TokenType::StringLiteral) - guid_str = String(pos->begin, pos->end); + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + guid_str = String(pos->begin+1, pos->end -1); else { auto start = pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) @@ -85,32 +91,26 @@ bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) break; } --pos; - guid_str = std::format("'{}'",String(start->begin,pos->end)); + guid_str = String(start->begin,pos->end); } - out = guid_str; + out = std::format("toUUID('{}')", guid_str); ++pos; return true; } bool DatatypeInt::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toInt32"); } bool DatatypeLong::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toInt64"); } bool DatatypeReal::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toFloat64"); } bool DatatypeString::convertImpl(String &out,IParser::Pos &pos) @@ -122,12 +122,22 @@ bool DatatypeString::convertImpl(String &out,IParser::Pos &pos) bool DatatypeTimespan::convertImpl(String &out,IParser::Pos &pos) { + ParserKQLDateTypeTimespan time_span; + ASTPtr node; + Expected expected; + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; ++pos; - out = getConvertedArgument(fn_name, pos); + if (time_span.parse(pos, node, expected)) + { + out = std::to_string(time_span.toSeconds()); + ++pos; + } + else + throw Exception("Not a correct timespan expression: " + fn_name, ErrorCodes::BAD_ARGUMENTS); return true; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 76707598788..2a88a56b844 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -372,9 +372,7 @@ bool ParseVersion::convertImpl(String & out,IParser::Pos & pos) bool ReplaceRegex::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "replaceRegexpAll"); } bool Reverse::convertImpl(String & out,IParser::Pos & pos) @@ -551,23 +549,48 @@ bool Translate::convertImpl(String & out,IParser::Pos & pos) bool Trim::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + String ltrim = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as srcl, concat('random_str', {1}),'') as dstl) = srcl, {0}, dstl)", source, regex); + out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as srcr, concat('random_str', reverse({1})),'') as dstr) = srcr, {0}, reverse(dstr))", ltrim, regex); + + return true; } bool TrimEnd::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as src, concat('random_str', reverse({1})),'') as dst) = src, {0}, reverse(dst))", source, regex); + + return true; } bool TrimStart::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + out = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as src, concat('random_str', {1}),'') as dst) = src, {0}, dst)", source, regex); + + return true; } bool URLDecode::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp index d83ef4e2f53..af3c4e45875 100644 --- a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -11,10 +12,15 @@ namespace DB bool ParserKQLDateTypeTimespan :: parseImpl(Pos & pos, [[maybe_unused]] ASTPtr & node, Expected & expected) { - const String token(pos->begin,pos->end); + String token; const char * current_word = pos->begin; expected.add(pos, current_word); + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral ) + token = String(pos->begin + 1, pos->end -1); + else + token = String(pos->begin, pos->end); + if (!parseConstKQLTimespan(token)) return false; @@ -84,6 +90,7 @@ bool ParserKQLDateTypeTimespan :: parseConstKQLTimespan(const String & text) {"ticks", KQLTimespanUint::tick} }; + uint16_t days = 0, hours = 0, minutes = 0, seconds = 0, milliseconds = 0; const char * ptr = text.c_str(); @@ -99,21 +106,75 @@ bool ParserKQLDateTypeTimespan :: parseConstKQLTimespan(const String & text) if (number_len <= 0) return false; + days = std::stoi(String(ptr, ptr + number_len)); + if (*(ptr + number_len) == '.') { auto fractionLen = scanDigit(ptr + number_len + 1); if (fractionLen >= 0) { + hours = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + fractionLen)); number_len += fractionLen + 1; } + else + { + hours = days; + days = 0; + } } - String timespan_suffix(ptr + number_len, ptr+text.size()); - if (TimespanSuffixes.find(timespan_suffix) == TimespanSuffixes.end()) + if (hours > 23) return false; - time_span = std::stod(String(ptr, ptr + number_len)); - time_span_unit =TimespanSuffixes[timespan_suffix] ; + if (*(ptr + number_len) != ':') + { + String timespan_suffix(ptr + number_len, ptr + text.size()); + + trim(timespan_suffix); + if (TimespanSuffixes.find(timespan_suffix) == TimespanSuffixes.end()) + return false; + + time_span = std::stod(String(ptr, ptr + number_len)); + time_span_unit = TimespanSuffixes[timespan_suffix] ; + + return true; + } + + auto min_len = scanDigit(ptr + number_len + 1); + if (min_len < 0) + return false; + + minutes = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + min_len)); + if (minutes > 59) + return false; + + number_len += min_len + 1; + if (*(ptr + number_len) == ':') + { + auto sec_len = scanDigit(ptr + number_len + 1); + if (sec_len > 0) + { + seconds = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + sec_len)); + if (seconds > 59) + return false; + + number_len += sec_len + 1; + if (*(ptr + number_len) == '.') + { + auto milli_len = scanDigit(ptr + number_len + 1); + if (milli_len > 0) + { + milliseconds = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + milli_len)); + + if (milliseconds > 1000) + return false; + } + } + } + } + + time_span = days * 86400 + hours * 3600 + minutes * 60 + seconds + milliseconds / 1000; + time_span_unit = KQLTimespanUint::second; return true; } diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index be67807ad8f..449b6972cd1 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -213,7 +213,7 @@ Token Lexer::nextTokenImpl() for (const char * iterator = token_begin; iterator < pos; ++iterator) { - if (!isWordCharASCII(*iterator) && *iterator != '$') + if (!isWordCharASCII(*iterator) && *iterator != '$' && *iterator != '.') return Token(TokenType::ErrorWrongNumber, token_begin, pos); } diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp new file mode 100644 index 00000000000..f2994464e14 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -0,0 +1,169 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} +class ParserStringFuncTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserStringFuncTest, ParseQuery) +{ const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + ASSERT_NE(nullptr, parser); + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')", + "SELECT base64Encode('ae3133f2-6e22-49ae-b06a-16e6a9b212eb') AS Quine" + }, + { + "print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')", + "SELECT base64Decode('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')" + }, + { + "print base64_decode_toarray('S3VzdG8=')", + "SELECT arrayMap(x -> reinterpretAsUInt8(x), splitByRegexp('', base64Decode('S3VzdG8=')))" + }, + { + "print replace_regex('Hello, World!', '.', '\\0\\0')", + "SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0')" + }, + { + "print idx = has_any_index('this is an example', dynamic(['this', 'example'])) ", + "SELECT if(empty(['this', 'example']), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty(['this', 'example']), [''], arrayMap(x -> toString(x), ['this', 'example']))), 1) - 1) AS idx" + }, + { + "print idx = has_any_index('this is an example', dynamic([]))", + "SELECT if(empty([]), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty([]), [''], arrayMap(x -> toString(x), []))), 1) - 1) AS idx" + }, + { + "print translate('krasp', 'otsku', 'spark')", + "SELECT if(length('otsku') = 0, '', translate('spark', 'krasp', multiIf(length('otsku') = 0, 'krasp', (length('krasp') - length('otsku')) > 0, concat('otsku', repeat(substr('otsku', length('otsku'), 1), toUInt16(length('krasp') - length('otsku')))), (length('krasp') - length('otsku')) < 0, substr('otsku', 1, length('krasp')), 'otsku')))" + }, + { + "print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))", + "SELECT if((replaceRegexpOne(concat('random_str', concat('- ', 'Te st1', '// $')) AS src, concat('random_str', '[^\\\\w]+'), '') AS dst) = src, concat('- ', 'Te st1', '// $'), dst)" + }, + { + "print trim_end('.com', 'bing.com')", + "SELECT if((replaceRegexpOne(concat('random_str', reverse('bing.com')) AS src, concat('random_str', reverse('.com')), '') AS dst) = src, 'bing.com', reverse(dst))" + }, + { + "print trim('--', '--https://bing.com--')", + "SELECT if((replaceRegexpOne(concat('random_str', reverse(if((replaceRegexpOne(concat('random_str', '--https://bing.com--') AS srcl, concat('random_str', '--'), '') AS dstl) = srcl, '--https://bing.com--', dstl))) AS srcr, concat('random_str', reverse('--')), '') AS dstr) = srcr, if(dstl = srcl, '--https://bing.com--', dstl), reverse(dstr))" + }, + { + "print bool(1)", + "SELECT toBool(1)" + }, + { + "print datetime(2015-12-31 23:59:59.9)", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print datetime(\"2015-12-31 23:59:59.9\")", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print datetime('2015-12-31 23:59:59.9')", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)", + "SELECT toUUID('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')", + "SELECT toUUID('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de1e4e49d9b579fe0b331d3642')", + "SELECT toUUID('74be27de1e4e49d9b579fe0b331d3642')" + }, + { + "print int(32.5)", + "SELECT toInt32(32.5)" + }, + { + "print long(32.5)", + "SELECT toInt64(32.5)" + }, + { + "print real(32.5)", + "SELECT toFloat64(32.5)" + }, + { + "print time('1.22:34:8.128')", + "SELECT 167648." + }, + { + "print time('1d')", + "SELECT 86400." + }, + { + "print time('1.5d')", + "SELECT 129600." + }, + { + "print timespan('1.5d')", + "SELECT 129600." + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 77ad9714735..74d13c60d05 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -121,6 +121,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, { "print now()", "SELECT now64(9, 'UTC')" + }, + { + "print now(1d)", + "SELECT now64(9, 'UTC') + 86400." } })));