Fix constness of custom TLDs

Before this patch the functions below returns incorrect type for consts,
and hence optimize_skip_unused_shards does not work:

- cutToFirstSignificantSubdomainCustom()
- cutToFirstSignificantSubdomainCustomWithWWW()
- firstSignificantSubdomainCustom()
This commit is contained in:
Azat Khuzhin 2021-07-07 01:12:56 +03:00
parent 7b004fb828
commit 42a8445462
5 changed files with 41 additions and 4 deletions

View File

@ -60,14 +60,25 @@ public:
return arguments[0].type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue<String>());
/// FIXME: convertToFullColumnIfConst() is suboptimal
auto column = arguments[0].column->convertToFullColumnIfConst();
if (const ColumnString * col = checkAndGetColumn<ColumnString>(*column))
auto column = arguments[0].column;
if (const ColumnConst * const_col = checkAndGetColumnConst<ColumnString>(column.get()))
{
const String & data = const_col->getValue<String>();
const String & res = scalar(tld_lookup, data);
auto col_res = ColumnString::create();
col_res->insert(res);
auto col_const_res = ColumnConst::create(std::move(col_res), input_rows_count);
return col_const_res;
}
else if (const ColumnString * col = checkAndGetColumn<ColumnString>(*column))
{
auto col_res = ColumnString::create();
vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
@ -107,6 +118,15 @@ public:
prev_offset = offsets[i];
}
}
static String scalar(FirstSignificantSubdomainCustomLookup & tld_lookup, const String & data)
{
Pos start;
size_t length;
Extractor::execute(tld_lookup, &data[0], data.size(), start, length);
String output(start, length);
return output;
}
};
}

View File

@ -22,3 +22,9 @@ foobar.com
foobar.com
foobar.com
xx.blogspot.co.at
-- www
www.foo
foo
-- vector
xx.blogspot.co.at

View File

@ -29,3 +29,11 @@ select cutToFirstSignificantSubdomainCustom('http://foobar.com', 'public_suffix_
select cutToFirstSignificantSubdomainCustom('http://foobar.com/foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://bar.foobar.com/foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at', 'public_suffix_list');
select '-- www';
select cutToFirstSignificantSubdomainCustomWithWWW('http://www.foo', 'public_suffix_list');
select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_list');
select '-- vector';
select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1);
select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1);

View File

@ -0,0 +1 @@
foo.com

View File

@ -0,0 +1,2 @@
select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('foo.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1;
select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('bar.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1;