Merge pull request #26180 from ClickHouse/azat-subdomains

Merging #26041.
This commit is contained in:
alexey-milovidov 2021-07-10 19:16:01 +03:00 committed by GitHub
commit f95cd5b1e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 30 additions and 7 deletions

View File

@ -237,7 +237,12 @@ public:
// 1. Always memcpy 8 times bytes
// 2. Use switch case extension to generate fast dispatching table
// 3. Funcs are named callables that can be force_inlined
//
// NOTE: It relies on Little Endianness
//
// NOTE: It requires padded to 8 bytes keys (IOW you cannot pass
// std::string here, but you can pass i.e. ColumnString::getDataAt()),
// since it copies 8 bytes at a time.
template <typename Self, typename KeyHolder, typename Func>
static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
{

View File

@ -358,6 +358,10 @@ public:
*/
virtual bool useDefaultImplementationForConstants() const { return false; }
/** Some arguments could remain constant during this implementation.
*/
virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; }
/** If function arguments has single low cardinality column and all other arguments are constants, call function on nested column.
* Otherwise, convert all low cardinality columns to ordinary columns.
* Returns ColumnLowCardinality if at least one argument is ColumnLowCardinality.
@ -367,10 +371,6 @@ public:
/// If it isn't, will convert all ColumnLowCardinality arguments to full columns.
virtual bool canBeExecutedOnLowCardinalityDictionary() const { return true; }
/** Some arguments could remain constant during this implementation.
*/
virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; }
/** True if function can be called on default arguments (include Nullable's) and won't throw.
* Counterexample: modulo(0, 0)
*/

View File

@ -41,6 +41,9 @@ public:
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!isString(arguments[0].type))
@ -65,9 +68,7 @@ public:
const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue<String>());
/// FIXME: convertToFullColumnIfConst() is suboptimal
auto column = arguments[0].column->convertToFullColumnIfConst();
if (const ColumnString * col = checkAndGetColumn<ColumnString>(*column))
if (const ColumnString * col = checkAndGetColumn<ColumnString>(*arguments[0].column))
{
auto col_res = ColumnString::create();
vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());

View File

@ -22,3 +22,9 @@ foobar.com
foobar.com
foobar.com
xx.blogspot.co.at
-- www
www.foo
foo
-- vector
xx.blogspot.co.at

View File

@ -29,3 +29,11 @@ select cutToFirstSignificantSubdomainCustom('http://foobar.com', 'public_suffix_
select cutToFirstSignificantSubdomainCustom('http://foobar.com/foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://bar.foobar.com/foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at', 'public_suffix_list');
select '-- www';
select cutToFirstSignificantSubdomainCustomWithWWW('http://www.foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_list');
select '-- vector';
select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1);
select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1);

View File

@ -0,0 +1 @@
foo.com

View File

@ -0,0 +1,2 @@
select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('foo.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1;
select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('bar.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1;