Merge branch 'task_ends_starts' of https://github.com/VadimPE/ClickHouse into VadimPE-task_ends_starts

This commit is contained in:
Alexey Milovidov 2018-06-06 23:36:22 +03:00
commit 5fa25968c8
6 changed files with 601 additions and 0 deletions

368
Comparison/Comparison.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -1082,6 +1082,164 @@ private:
ErrorCodes::ILLEGAL_COLUMN};
}
};
template <typename Name>
class FunctionStartsEndsWith : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionStartsEndsWith>();
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 2;
}
bool useDefaultImplementationForConstants() const override
{
return true;
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!arguments[0]->isStringOrFixedString())
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!arguments[1]->isStringOrFixedString())
throw Exception(
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeNumber<UInt8>>();
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
const IColumn * string_columns = block.getByPosition(arguments[0]).column.get();
const IColumn * template_columns = block.getByPosition(arguments[1]).column.get();
const ColumnString * strings = checkAndGetColumn<ColumnString>(string_columns);
const ColumnString * templates = checkAndGetColumn<ColumnString>(template_columns);
const ColumnFixedString * fixed_strings = checkAndGetColumn<ColumnFixedString>(string_columns);
const ColumnFixedString * fixed_templates = checkAndGetColumn<ColumnFixedString>(template_columns);
const ColumnConst * const_templates = checkAndGetColumn<ColumnConst>(template_columns);
auto col_res = ColumnVector<UInt8>::create();
typename ColumnVector<UInt8>::Container & vec_res = col_res->getData();
FunctionName functionName;
if (name == static_cast<std::string>("startsWith"))
{
functionName = FunctionName::StartsWith;
}
else
{
functionName = FunctionName::EndsWith;
}
if (strings && templates)
{
vec_res.resize(strings->size());
execute<StringSource, StringSource>(StringSource(*strings), StringSource(*templates), vec_res, functionName);
}
else if (strings && fixed_templates)
{
vec_res.resize(strings->size());
execute<StringSource, FixedStringSource>(StringSource(*strings), FixedStringSource(*fixed_templates), vec_res, functionName);
}
else if (fixed_strings && templates)
{
vec_res.resize(fixed_strings->size());
execute<FixedStringSource, StringSource>(FixedStringSource(*fixed_strings), StringSource(*templates), vec_res, functionName);
}
else if (fixed_strings && fixed_templates)
{
vec_res.resize(fixed_strings->size());
execute<FixedStringSource, FixedStringSource>(
FixedStringSource(*fixed_strings), FixedStringSource(*fixed_templates), vec_res, functionName);
}
else if (strings)
{
vec_res.resize(strings->size());
execute<StringSource, ConstSource<StringSource>>(
StringSource(*strings), ConstSource<StringSource>(*const_templates), vec_res, functionName);
}
else if (fixed_strings)
{
vec_res.resize(fixed_strings->size());
execute<FixedStringSource, ConstSource<StringSource>>(
FixedStringSource(*fixed_strings), ConstSource<StringSource>(*const_templates), vec_res, functionName);
}
block.getByPosition(result).column = std::move(col_res);
}
private:
enum FunctionName
{
StartsWith,
EndsWith
};
template <typename StringType, typename TemplateType>
static void execute(StringType str, TemplateType str_template, PaddedPODArray<UInt8> & res_data, FunctionName functionName)
{
size_t ind = 0;
size_t start_pos;
while (!str.isEnd())
{
if (str_template.getElementSize() == 1)
{
res_data[ind] = true;
}
else
{
if (str.getElementSize() < str_template.getElementSize())
{
res_data[ind] = false;
}
else
{
if (functionName == FunctionName::StartsWith)
{
start_pos = 0;
}
else
{
start_pos = str.getElementSize() - str_template.getElementSize();
}
StringRef str_ref(str.getWhole().data + start_pos, str_template.getElementSize() - 1);
StringRef template_ref(str_template.getWhole().data, str_template.getElementSize() - 1);
if (str_ref == template_ref)
{
res_data[ind] = true;
}
else
{
res_data[ind] = false;
}
}
}
str.next();
ind++;
if (str_template.getSizeForReserve() > 1)
{
str_template.next();
}
}
}
};
struct NameEmpty
@ -1120,6 +1278,15 @@ struct NameConcatAssumeInjective
{
static constexpr auto name = "concatAssumeInjective";
};
struct NameStartsWith
{
static constexpr auto name = "startsWith";
};
struct NameEndsWith
{
static constexpr auto name = "endsWith";
};
using FunctionEmpty = FunctionStringOrArrayToT<EmptyImpl<false>, NameEmpty, UInt8>;
using FunctionNotEmpty = FunctionStringOrArrayToT<EmptyImpl<true>, NameNotEmpty, UInt8>;
@ -1130,6 +1297,8 @@ using FunctionUpper = FunctionStringToString<LowerUpperImpl<'a', 'z'>, NameUpper
using FunctionReverseUTF8 = FunctionStringToString<ReverseUTF8Impl, NameReverseUTF8, true>;
using FunctionConcat = ConcatImpl<NameConcat, false>;
using FunctionConcatAssumeInjective = ConcatImpl<NameConcatAssumeInjective, true>;
using FunctionStartsWith = FunctionStartsEndsWith<NameStartsWith>;
using FunctionEndsWith = FunctionStartsEndsWith<NameEndsWith>;
void registerFunctionsString(FunctionFactory & factory)
@ -1149,5 +1318,7 @@ void registerFunctionsString(FunctionFactory & factory)
factory.registerFunction<FunctionSubstring>();
factory.registerFunction<FunctionSubstringUTF8>();
factory.registerFunction<FunctionAppendTrailingCharIfAbsent>();
factory.registerFunction<FunctionStartsWith>();
factory.registerFunction<FunctionEndsWith>();
}
}

View File

@ -0,0 +1,16 @@
0
0
1
1
0
0
1
1
1
1
1
0
1
3
2
0

View File

@ -0,0 +1,15 @@
SELECT startsWith(s, 'He') FROM (SELECT arrayJoin(['', 'H', 'He', 'Hellow', '3434', 'fffffffffdHe']) AS s);
SELECT startsWith(s, '') FROM (SELECT arrayJoin(['', 'h', 'hi']) AS s);
SELECT startsWith('123', '123');
SELECT startsWith('123', '12');
SELECT startsWith('123', '1234');
SELECT startsWith('123', '');
DROP TABLE IF EXISTS startsWith_test;
CREATE TABLE startsWith_test(S1 String, S2 String, S3 FixedString(2)) ENGINE=Memory;
INSERT INTO startsWith_test values ('11', '22', '33'), ('a', 'a', 'bb'), ('abc', 'ab', '23');
SELECT COUNT() FROM startsWith_test WHERE startsWith(S1, S1);
SELECT COUNT() FROM startsWith_test WHERE startsWith(S1, S2);
SELECT COUNT() FROM startsWith_test WHERE startsWith(S2, S3);
DROP TABLE startsWith_test;

View File

@ -0,0 +1,16 @@
0
0
1
1
0
0
1
1
1
1
1
0
1
3
2
0

View File

@ -0,0 +1,15 @@
SELECT endsWith(s, 'ow') FROM (SELECT arrayJoin(['', 'o', 'ow', 'Hellow', '3434', 'owfffffffdHe']) AS s);
SELECT endsWith(s, '') FROM (SELECT arrayJoin(['', 'h', 'hi']) AS s);
SELECT endsWith('123', '3');
SELECT endsWith('123', '23');
SELECT endsWith('123', '32');
SELECT endsWith('123', '');
DROP TABLE IF EXISTS endsWith_test;
CREATE TABLE endsWith_test(S1 String, S2 String, S3 FixedString(2)) ENGINE=Memory;
INSERT INTO endsWith_test values ('11', '22', '33'), ('a', 'a', 'bb'), ('abc', 'bc', '23');
SELECT COUNT() FROM endsWith_test WHERE endsWith(S1, S1);
SELECT COUNT() FROM endsWith_test WHERE endsWith(S1, S2);
SELECT COUNT() FROM endsWith_test WHERE endsWith(S2, S3);
DROP TABLE endsWith_test;