From 42a844546229e56c51a3ec986467ced52e4ed972 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 7 Jul 2021 01:12:56 +0300 Subject: [PATCH 1/5] Fix constness of custom TLDs Before this patch the functions below returns incorrect type for consts, and hence optimize_skip_unused_shards does not work: - cutToFirstSignificantSubdomainCustom() - cutToFirstSignificantSubdomainCustomWithWWW() - firstSignificantSubdomainCustom() --- .../URL/FirstSignificantSubdomainCustomImpl.h | 28 ++++++++++++++++--- .../0_stateless/01601_custom_tld.reference | 6 ++++ .../queries/0_stateless/01601_custom_tld.sql | 8 ++++++ .../01940_custom_tld_sharding_key.reference | 1 + .../01940_custom_tld_sharding_key.sql | 2 ++ 5 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/01940_custom_tld_sharding_key.reference create mode 100644 tests/queries/0_stateless/01940_custom_tld_sharding_key.sql diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 4670d610725..70cf30a7384 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -60,14 +60,25 @@ public: return arguments[0].type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get()); FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue()); - /// FIXME: convertToFullColumnIfConst() is suboptimal - auto column = arguments[0].column->convertToFullColumnIfConst(); - if (const ColumnString * col = checkAndGetColumn(*column)) + auto column = arguments[0].column; + + if (const ColumnConst * const_col = checkAndGetColumnConst(column.get())) + { + const String & data = const_col->getValue(); + const String & res = scalar(tld_lookup, data); + + auto col_res = ColumnString::create(); + col_res->insert(res); + + auto col_const_res = ColumnConst::create(std::move(col_res), input_rows_count); + return col_const_res; + } + else if (const ColumnString * col = checkAndGetColumn(*column)) { auto col_res = ColumnString::create(); vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); @@ -107,6 +118,15 @@ public: prev_offset = offsets[i]; } } + + static String scalar(FirstSignificantSubdomainCustomLookup & tld_lookup, const String & data) + { + Pos start; + size_t length; + Extractor::execute(tld_lookup, &data[0], data.size(), start, length); + String output(start, length); + return output; + } }; } diff --git a/tests/queries/0_stateless/01601_custom_tld.reference b/tests/queries/0_stateless/01601_custom_tld.reference index e056505f273..04204ebf02a 100644 --- a/tests/queries/0_stateless/01601_custom_tld.reference +++ b/tests/queries/0_stateless/01601_custom_tld.reference @@ -22,3 +22,9 @@ foobar.com foobar.com foobar.com xx.blogspot.co.at +-- www +www.foo +foo +-- vector +xx.blogspot.co.at + diff --git a/tests/queries/0_stateless/01601_custom_tld.sql b/tests/queries/0_stateless/01601_custom_tld.sql index 688dd419858..ceb00d5ff19 100644 --- a/tests/queries/0_stateless/01601_custom_tld.sql +++ b/tests/queries/0_stateless/01601_custom_tld.sql @@ -29,3 +29,11 @@ select cutToFirstSignificantSubdomainCustom('http://foobar.com', 'public_suffix_ select cutToFirstSignificantSubdomainCustom('http://foobar.com/foo', 'public_suffix_list'); select cutToFirstSignificantSubdomainCustom('http://bar.foobar.com/foo', 'public_suffix_list'); select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at', 'public_suffix_list'); + +select '-- www'; +select cutToFirstSignificantSubdomainCustomWithWWW('http://www.foo', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_list'); + +select '-- vector'; +select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1); +select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1); diff --git a/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference b/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference new file mode 100644 index 00000000000..0989a305613 --- /dev/null +++ b/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference @@ -0,0 +1 @@ +foo.com diff --git a/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql b/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql new file mode 100644 index 00000000000..5d38cfb18dc --- /dev/null +++ b/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql @@ -0,0 +1,2 @@ +select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('foo.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1; +select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('bar.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1; From 5e120b8d37a17c09232b490e3c438085e7c6ac8d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 7 Jul 2021 09:42:13 +0300 Subject: [PATCH 2/5] Fix stack-buffer-overflow in custom TLDs due to StringHashTable copy 8 bytes at a time ASan reports [1]: ==164==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7f0209dd4abf at pc 0x00000b75b7c5 bp 0x7f0209dd4760 sp 0x7f0209dd4758 READ of size 8 at 0x7f0209dd4abf thread T4 (TCPHandler) 0 0xb75b7c4 in auto StringHashTable > >::dispatch > > const, StringRef const&, StringHashTable > >::FindCallable>(StringHashTable > > const&, StringRef const&, StringHashTable > >::FindCallable&&) obj-x86_64-linux-gnu/../src/Common/HashTable/StringHashTable.h:283:21 1 0xb75b7c4 in StringHashTable > >::has(StringRef const&, unsigned long) const obj-x86_64-linux-gnu/../src/Common/HashTable/StringHashTable.h:365:16 2 0xb75b7c4 in DB::TLDList::has(StringRef const&) const obj-x86_64-linux-gnu/../src/Common/TLDListsHolder.cpp:31:26 3 0x1c4a6046 in void DB::ExtractFirstSignificantSubdomain::executeCustom(DB::FirstSignificantSubdomainCustomLookup const&, char const*, unsigned long, char const*&, unsigned long&, char const**) (/usr/bin/clickhouse+0x1c4a6046) 4 0x1c4a3586 in DB::FunctionCutToFirstSignificantSubdomainCustomImpl, DB::NameCutToFirstSignificantSubdomainCustom>::executeImpl(std::__1::vector > const&, std::__1::shared_ptr const&, unsigned long) const (/usr/bin/clickhouse+0x1c4a3586) 5 0x10d96e34 in DB::IFunction::executeImplDryRun(std::__1::vector > const&, std::__1::shared_ptr const&, unsigned long) const (/usr/bin/clickhouse+0x10d96e34) 6 0x10d9648b in DB::FunctionToExecutableFunctionAdaptor::executeDryRunImpl(std::__1::vector > const&, std::__1::shared_ptr const&, unsigned long) const (/usr/bin/clickhouse+0x10d9648b) 7 0x200ed79b in DB::IExecutableFunction::executeWithoutLowCardinalityColumns(std::__1::vector > const&, std::__1::shared_ptr const&, unsigned long, bool) const obj-x86_64-linux-gnu/../src/Functions/IFunction.cpp:212:15 8 0x200ee436 in DB::IExecutableFunction::execute(std::__1::vector > const&, std::__1::shared_ptr const&, unsigned long, bool) const obj-x86_64-linux-gnu/../src/Functions/IFunction.cpp:257:22 9 0x20cd6f6f in DB::ActionsDAG::addFunction(std::__1::shared_ptr const&, std::__1::vector >, std::__1::basic_string, std::__1::allocator >) obj-x86_64-linux-gnu/../src/Interpreters/ActionsDAG.cpp:214:37 10 0x2124c8a7 in DB::ScopeStack::addFunction(std::__1::shared_ptr const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&, std::__1::basic_string, std::__1::allocator >) obj-x86_64-linux-gnu/../src/Interpreters/ActionsVisitor.cpp:570:51 11 0x2125c80d in DB::ActionsMatcher::Data::addFunction(std::__1::shared_ptr const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&, std::__1::basic_string, std::__1::allocator >) obj-x86_64-linux-gnu/../src/Interpreters/ActionsVisitor.h:169:27 12 0x2125c80d in DB::ActionsMatcher::visit(DB::ASTFunction const&, std::__1::shared_ptr const&, DB::ActionsMatcher::Data&) obj-x86_64-linux-gnu/../src/Interpreters/ActionsVisitor.cpp:1061:14 13 0x212522fb in DB::ActionsMatcher::visit(DB::ASTFunction const&, std::__1::shared_ptr const&, DB::ActionsMatcher::Data&) obj-x86_64-linux-gnu/../src/Interpreters/ActionsVisitor.cpp:971:17 14 0x2121354e in DB::InDepthNodeVisitor const>::visit(std::__1::shared_ptr const&) obj-x86_64-linux-gnu/../src/Interpreters/InDepthNodeVisitor.h:34:13 15 0x211e17c7 in DB::ExpressionAnalyzer::getRootActions(std::__1::shared_ptr const&, bool, std::__1::shared_ptr&, bool) obj-x86_64-linux-gnu/../src/Interpreters/ExpressionAnalyzer.cpp:421:48 16 0x21204024 in DB::ExpressionAnalyzer::getConstActions(std::__1::vector > const&) obj-x86_64-linux-gnu/../src/Interpreters/ExpressionAnalyzer.cpp:1423:5 17 0x230f7216 in DB::KeyCondition::getBlockWithConstants(std::__1::shared_ptr const&, std::__1::shared_ptr const&, std::__1::shared_ptr) obj-x86_64-linux-gnu/../src/Storages/MergeTree/KeyCondition.cpp:385:103 18 0x22877f9e in DB::(anonymous namespace)::replaceConstantExpressions(std::__1::shared_ptr&, std::__1::shared_ptr, DB::NamesAndTypesList const&, std::__1::shared_ptr, std::__1::shared_ptr const&) obj-x86_64-linux-gnu/../src/Storages/StorageDistributed.cpp:280:34 19 0x22877f9e in DB::StorageDistributed::skipUnusedShards(std::__1::shared_ptr, std::__1::shared_ptr const&, std::__1::shared_ptr const&, std::__1::shared_ptr) const obj-x86_64-linux-gnu/../src/Storages/StorageDistributed.cpp:1091:5 20 0x2285d215 in DB::StorageDistributed::getOptimizedCluster(std::__1::shared_ptr, std::__1::shared_ptr const&, std::__1::shared_ptr const&) const obj-x86_64-linux-gnu/../src/Storages/StorageDistributed.cpp:1015:32 21 0x2285a9c4 in DB::StorageDistributed::getQueryProcessingStage(std::__1::shared_ptr, DB::QueryProcessingStage::Enum, std::__1::shared_ptr const&, DB::SelectQueryInfo&) const obj-x86_64-linux-gnu/../src/Storages/StorageDistributed.cpp:500:40 22 0x2183a4b2 in DB::InterpreterSelectQuery::getSampleBlockImpl() obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectQuery.cpp:616:31 23 0x21828db4 in DB::InterpreterSelectQuery::InterpreterSelectQuery(std::__1::shared_ptr const&, std::__1::shared_ptr, std::__1::shared_ptr const&, std::__1::optional, std::__1::shared_ptr const&, DB::SelectQueryOptions const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&, std::__1::shared_ptr const&)::$_1::operator()(bool) const obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectQuery.cpp:506:25 24 0x2181b652 in DB::InterpreterSelectQuery::InterpreterSelectQuery(std::__1::shared_ptr const&, std::__1::shared_ptr, std::__1::shared_ptr const&, std::__1::optional, std::__1::shared_ptr const&, DB::SelectQueryOptions const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&, std::__1::shared_ptr const&) obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectQuery.cpp:509:5 25 0x21817cbe in DB::InterpreterSelectQuery::InterpreterSelectQuery(std::__1::shared_ptr const&, std::__1::shared_ptr, DB::SelectQueryOptions const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectQuery.cpp:161:7 26 0x21dd0eb5 in std::__1::__unique_if::__unique_single std::__1::make_unique const&, std::__1::shared_ptr&, DB::SelectQueryOptions&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&>(std::__1::shared_ptr const&, std::__1::shared_ptr&, DB::SelectQueryOptions&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) obj-x86_64-linux-gnu/../contrib/libcxx/include/memory:2068:32 27 0x21dd0eb5 in DB::InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(std::__1::shared_ptr const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectWithUnionQuery.cpp:212:16 28 0x21dcd0e7 in DB::InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(std::__1::shared_ptr const&, std::__1::shared_ptr, DB::SelectQueryOptions const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectWithUnionQuery.cpp:134:13 29 0x211afe79 in std::__1::__unique_if::__unique_single std::__1::make_unique&, std::__1::shared_ptr&, DB::SelectQueryOptions const&>(std::__1::shared_ptr&, std::__1::shared_ptr&, DB::SelectQueryOptions const&) obj-x86_64-linux-gnu/../contrib/libcxx/include/memory:2068:32 30 0x211afe79 in DB::InterpreterFactory::get(std::__1::shared_ptr&, std::__1::shared_ptr, DB::SelectQueryOptions const&) obj-x86_64-linux-gnu/../src/Interpreters/InterpreterFactory.cpp:110:16 31 0x22273f97 in DB::executeQueryImpl(char const*, char const*, std::__1::shared_ptr, bool, DB::QueryProcessingStage::Enum, bool, DB::ReadBuffer*) obj-x86_64-linux-gnu/../src/Interpreters/executeQuery.cpp:524:28 32 0x22270ce2 in DB::executeQuery(std::__1::basic_string, std::__1::allocator > const&, std::__1::shared_ptr, bool, DB::QueryProcessingStage::Enum, bool) obj-x86_64-linux-gnu/../src/Interpreters/executeQuery.cpp:913:30 33 0x23905879 in DB::TCPHandler::runImpl() obj-x86_64-linux-gnu/../src/Server/TCPHandler.cpp:312:24 34 0x2392b81c in DB::TCPHandler::run() obj-x86_64-linux-gnu/../src/Server/TCPHandler.cpp:1622:9 35 0x2ab1fd8e in Poco::Net::TCPServerConnection::start() obj-x86_64-linux-gnu/../contrib/poco/Net/src/TCPServerConnection.cpp:43:3 36 0x2ab20952 in Poco::Net::TCPServerDispatcher::run() obj-x86_64-linux-gnu/../contrib/poco/Net/src/TCPServerDispatcher.cpp:115:20 37 0x2adfa3f4 in Poco::PooledThread::run() obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/ThreadPool.cpp:199:14 38 0x2adf4716 in Poco::ThreadImpl::runnableEntry(void*) obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/Thread_POSIX.cpp:345:27 39 0x7f02e66f2608 in start_thread (/lib/x86_64-linux-gnu/libpthread.so.0+0x9608) 40 0x7f02e6619292 in clone (/lib/x86_64-linux-gnu/libc.so.6+0x122292) Address 0x7f0209dd4abf is located in stack of thread T4 (TCPHandler) at offset 447 in frame 0 0x1c4a2c6f in DB::FunctionCutToFirstSignificantSubdomainCustomImpl, DB::NameCutToFirstSignificantSubdomainCustom>::executeImpl(std::__1::vector > const&, std::__1::shared_ptr const&, unsigned long) const (/usr/bin/clickhouse+0x1c4a2c6f) This frame has 16 object(s): [32, 40) 'ref.tmp.i168' [64, 72) 'tmp_data.i.i' [96, 104) 'tmp_length.i.i' [128, 136) 'domain_end.i.i' [160, 216) 'ref.tmp.i132' [256, 312) 'ref.tmp.i' [352, 360) 'tld_lookup' [384, 408) 'ref.tmp' [448, 472) 'ref.tmp11' <== Memory access at offset 447 partially underflows this variable [512, 536) 'ref.tmp14' [576, 632) 'ref.tmp20' [672, 696) 'ref.tmp65' [736, 760) 'ref.tmp66' [800, 824) 'ref.tmp67' [864, 888) 'ref.tmp68' [928, 952) 'ref.tmp78' HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork (longjmp and C++ exceptions *are* supported) Thread T4 (TCPHandler) created by T0 here: 0 0xb51940a in pthread_create (/usr/bin/clickhouse+0xb51940a) 1 0x2adf3a9f in Poco::ThreadImpl::startImpl(Poco::SharedPtr >) obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/Thread_POSIX.cpp:202:6 2 0x2adf699a in Poco::Thread::start(Poco::Runnable&) obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/Thread.cpp:128:2 3 0x2adfa998 in Poco::PooledThread::start() obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/ThreadPool.cpp:85:10 4 0x2adfa998 in Poco::ThreadPool::ThreadPool(int, int, int, int) obj-x86_64-linux-gnu/../contrib/poco/Foundation/src/ThreadPool.cpp:252:12 5 0xb582c25 in DB::Server::main(std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) obj-x86_64-linux-gnu/../programs/server/Server.cpp:915:22 6 0x2ab511a5 in Poco::Util::Application::run() obj-x86_64-linux-gnu/../contrib/poco/Util/src/Application.cpp:334:8 7 0xb56a89c in DB::Server::run() obj-x86_64-linux-gnu/../programs/server/Server.cpp:392:25 8 0x2ab956f7 in Poco::Util::ServerApplication::run(int, char**) obj-x86_64-linux-gnu/../contrib/poco/Util/src/ServerApplication.cpp:611:9 9 0xb566519 in mainEntryClickHouseServer(int, char**) obj-x86_64-linux-gnu/../programs/server/Server.cpp:171:20 10 0xb56224a in main obj-x86_64-linux-gnu/../programs/main.cpp:366:12 11 0x7f02e651e0b2 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x270b2) SUMMARY: AddressSanitizer: stack-buffer-overflow obj-x86_64-linux-gnu/../src/Common/HashTable/StringHashTable.h:283:21 in auto StringHashTable > >::dispatch > > const, StringRef const&, StringHashTable > >::FindCallable>(StringHashTable > > const&, StringRef const&, StringHashTable > >::FindCallable&&) Shadow bytes around the buggy address: 0x0fe0c13b2900: 00 00 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 0x0fe0c13b2910: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0fe0c13b2920: f1 f1 f1 f1 f8 f2 f2 f2 00 f2 f2 f2 00 f2 f2 f2 0x0fe0c13b2930: 00 f2 f2 f2 f8 f8 f8 f8 f8 f8 f8 f2 f2 f2 f2 f2 0x0fe0c13b2940: f8 f8 f8 f8 f8 f8 f8 f2 f2 f2 f2 f2 00 f2 f2 f2 =>0x0fe0c13b2950: f8 f8 f8 f2 f2 f2 f2[f2]00 00 00 f2 f2 f2 f2 f2 0x0fe0c13b2960: 00 00 00 f2 f2 f2 f2 f2 f8 f8 f8 f8 f8 f8 f8 f2 0x0fe0c13b2970: f2 f2 f2 f2 f8 f8 f8 f2 f2 f2 f2 f2 f8 f8 f8 f2 0x0fe0c13b2980: f2 f2 f2 f2 f8 f8 f8 f2 f2 f2 f2 f2 f8 f8 f8 f2 0x0fe0c13b2990: f2 f2 f2 f2 f8 f8 f8 f3 f3 f3 f3 f3 00 00 00 00 0x0fe0c13b29a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Shadow gap: cc ==164==ABORTING [1]: https://clickhouse-test-reports.s3.yandex.net/26041/42a844546229e56c51a3ec986467ced52e4ed972/functional_stateless_tests_flaky_check_(address)/stderr.log v2: Replace String with string_view in custom TLD for scalar v3: use ColumnString::getDataAt() --- .../URL/FirstSignificantSubdomainCustomImpl.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 70cf30a7384..3aee0141073 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -69,11 +69,11 @@ public: if (const ColumnConst * const_col = checkAndGetColumnConst(column.get())) { - const String & data = const_col->getValue(); - const String & res = scalar(tld_lookup, data); + const ColumnString * col_str = checkAndGetColumn(const_col->getDataColumn()); + const std::string_view & sv = scalar(tld_lookup, col_str->getDataAt(0)); auto col_res = ColumnString::create(); - col_res->insert(res); + col_res->insert(sv); auto col_const_res = ColumnConst::create(std::move(col_res), input_rows_count); return col_const_res; @@ -119,13 +119,12 @@ public: } } - static String scalar(FirstSignificantSubdomainCustomLookup & tld_lookup, const String & data) + static std::string_view scalar(FirstSignificantSubdomainCustomLookup & tld_lookup, const StringRef & data) { Pos start; size_t length; - Extractor::execute(tld_lookup, &data[0], data.size(), start, length); - String output(start, length); - return output; + Extractor::execute(tld_lookup, &data.data[0], data.size, start, length); + return {start, length}; } }; From 5bc05337128f0295a7d2e7ce20bc17bb517a053f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 7 Jul 2021 10:42:33 +0300 Subject: [PATCH 3/5] Add a note for padded to 8 bytes keys in StringHashTable --- src/Common/HashTable/StringHashTable.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/HashTable/StringHashTable.h b/src/Common/HashTable/StringHashTable.h index b05d119e0e9..d30271d65db 100644 --- a/src/Common/HashTable/StringHashTable.h +++ b/src/Common/HashTable/StringHashTable.h @@ -237,7 +237,12 @@ public: // 1. Always memcpy 8 times bytes // 2. Use switch case extension to generate fast dispatching table // 3. Funcs are named callables that can be force_inlined + // // NOTE: It relies on Little Endianness + // + // NOTE: It requires padded to 8 bytes keys (IOW you cannot pass + // std::string here, but you can pass i.e. ColumnString::getDataAt()), + // since it copies 8 bytes at a time. template static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) { From 9ca38235aa2f0e1d6b552625da40f4ee3d5e5ff7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Jul 2021 11:29:08 +0300 Subject: [PATCH 4/5] Correct fix for #26041 --- src/Functions/URL/FirstSignificantSubdomainCustomImpl.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 4670d610725..ba39eeb5e69 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -41,6 +41,9 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!isString(arguments[0].type)) @@ -65,8 +68,6 @@ public: const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get()); FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue()); - /// FIXME: convertToFullColumnIfConst() is suboptimal - auto column = arguments[0].column->convertToFullColumnIfConst(); if (const ColumnString * col = checkAndGetColumn(*column)) { auto col_res = ColumnString::create(); From ba1442532b4e59007ecda55785aeadaa5ab3eb5a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Jul 2021 11:43:28 +0300 Subject: [PATCH 5/5] Fix build --- src/Functions/URL/FirstSignificantSubdomainCustomImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index ba39eeb5e69..08576fe59ec 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -68,7 +68,7 @@ public: const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get()); FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue()); - if (const ColumnString * col = checkAndGetColumn(*column)) + if (const ColumnString * col = checkAndGetColumn(*arguments[0].column)) { auto col_res = ColumnString::create(); vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());