diff --git a/src/Common/HashTable/StringHashTable.h b/src/Common/HashTable/StringHashTable.h index b05d119e0e9..d30271d65db 100644 --- a/src/Common/HashTable/StringHashTable.h +++ b/src/Common/HashTable/StringHashTable.h @@ -237,7 +237,12 @@ public: // 1. Always memcpy 8 times bytes // 2. Use switch case extension to generate fast dispatching table // 3. Funcs are named callables that can be force_inlined + // // NOTE: It relies on Little Endianness + // + // NOTE: It requires padded to 8 bytes keys (IOW you cannot pass + // std::string here, but you can pass i.e. ColumnString::getDataAt()), + // since it copies 8 bytes at a time. template static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) { diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 0da55343b9d..58cdb68d100 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -358,6 +358,10 @@ public: */ virtual bool useDefaultImplementationForConstants() const { return false; } + /** Some arguments could remain constant during this implementation. + */ + virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; } + /** If function arguments has single low cardinality column and all other arguments are constants, call function on nested column. * Otherwise, convert all low cardinality columns to ordinary columns. * Returns ColumnLowCardinality if at least one argument is ColumnLowCardinality. @@ -367,10 +371,6 @@ public: /// If it isn't, will convert all ColumnLowCardinality arguments to full columns. virtual bool canBeExecutedOnLowCardinalityDictionary() const { return true; } - /** Some arguments could remain constant during this implementation. - */ - virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; } - /** True if function can be called on default arguments (include Nullable's) and won't throw. * Counterexample: modulo(0, 0) */ diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 4670d610725..08576fe59ec 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -41,6 +41,9 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!isString(arguments[0].type)) @@ -65,9 +68,7 @@ public: const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get()); FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue()); - /// FIXME: convertToFullColumnIfConst() is suboptimal - auto column = arguments[0].column->convertToFullColumnIfConst(); - if (const ColumnString * col = checkAndGetColumn(*column)) + if (const ColumnString * col = checkAndGetColumn(*arguments[0].column)) { auto col_res = ColumnString::create(); vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); diff --git a/tests/queries/0_stateless/01601_custom_tld.reference b/tests/queries/0_stateless/01601_custom_tld.reference index e056505f273..04204ebf02a 100644 --- a/tests/queries/0_stateless/01601_custom_tld.reference +++ b/tests/queries/0_stateless/01601_custom_tld.reference @@ -22,3 +22,9 @@ foobar.com foobar.com foobar.com xx.blogspot.co.at +-- www +www.foo +foo +-- vector +xx.blogspot.co.at + diff --git a/tests/queries/0_stateless/01601_custom_tld.sql b/tests/queries/0_stateless/01601_custom_tld.sql index 688dd419858..ceb00d5ff19 100644 --- a/tests/queries/0_stateless/01601_custom_tld.sql +++ b/tests/queries/0_stateless/01601_custom_tld.sql @@ -29,3 +29,11 @@ select cutToFirstSignificantSubdomainCustom('http://foobar.com', 'public_suffix_ select cutToFirstSignificantSubdomainCustom('http://foobar.com/foo', 'public_suffix_list'); select cutToFirstSignificantSubdomainCustom('http://bar.foobar.com/foo', 'public_suffix_list'); select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at', 'public_suffix_list'); + +select '-- www'; +select cutToFirstSignificantSubdomainCustomWithWWW('http://www.foo', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_list'); + +select '-- vector'; +select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1); +select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1); diff --git a/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference b/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference new file mode 100644 index 00000000000..0989a305613 --- /dev/null +++ b/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference @@ -0,0 +1 @@ +foo.com diff --git a/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql b/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql new file mode 100644 index 00000000000..5d38cfb18dc --- /dev/null +++ b/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql @@ -0,0 +1,2 @@ +select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('foo.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1; +select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('bar.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1;