Merge pull request #30840 from nickitat/nickitat_any_type_4_arrayStringConcat

This commit is contained in:
Vladimir C 2021-11-01 11:48:45 +03:00 committed by GitHub
commit 9d967e9883
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 65 additions and 48 deletions

View File

@ -213,7 +213,7 @@ SELECT splitByNonAlpha(' 1! a, b. ');
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator} ## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
Concatenates the strings (values of type String or Nullable(String)) listed in the array with the separator. separator is an optional parameter: a constant string, set to an empty string by default. Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
Returns the string. Returns the string.
## alphaTokens(s) {#alphatokenss} ## alphaTokens(s) {#alphatokenss}

View File

@ -212,8 +212,8 @@ SELECT splitByNonAlpha(' 1! a, b. ');
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator} ## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
Склеивает строки, перечисленные в массиве, с разделителем separator. Склеивает строковые представления элементов массива с разделителем `separator`.
separator - необязательный параметр, константная строка, по умолчанию равен пустой строке. `separator` - необязательный параметр, константная строка, по умолчанию равен пустой строке.
Возвращается строка. Возвращается строка.
## alphaTokens(s) {#alphatokenss} ## alphaTokens(s) {#alphatokenss}

View File

@ -1,21 +1,6 @@
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionsStringArray.h> #include <Functions/FunctionsStringArray.h>
namespace
{
bool isNullableStringOrNullableNothing(DB::DataTypePtr type)
{
if (type->isNullable())
{
const auto & nested_type = assert_cast<const DB::DataTypeNullable &>(*type).getNestedType();
if (isString(nested_type) || isNothing(nested_type))
return true;
}
return false;
}
}
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
@ -33,11 +18,8 @@ DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const DataTypes & argum
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get()); const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
// An array consisting of only Null-s has type Array(Nullable(Nothing)) if (!array_type)
if (!array_type || !(isString(array_type->getNestedType()) || isNullableStringOrNullableNothing(array_type->getNestedType()))) throw Exception("First argument for function " + getName() + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(
"First argument for function " + getName() + " must be an array of String-s or Nullable(String)-s.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 2 && !isString(arguments[1])) if (arguments.size() == 2 && !isString(arguments[1]))
throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -9,6 +9,7 @@
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/Regexps.h> #include <Functions/Regexps.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -17,7 +18,6 @@
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
namespace DB namespace DB
{ {
@ -648,7 +648,7 @@ public:
}; };
/// Joins an array of strings into one string via a separator. /// Joins an array of type serializable to string into one string via a separator.
class FunctionArrayStringConcat : public IFunction class FunctionArrayStringConcat : public IFunction
{ {
private: private:
@ -734,6 +734,25 @@ private:
null_map); null_map);
} }
static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
{
if (isString(nested_type))
{
return col_arr.getDataPtr();
}
else if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData());
col_nullable && isString(col_nullable->getNestedColumn().getDataType()))
{
return col_nullable->getNestedColumnPtr();
}
else
{
ColumnsWithTypeAndName cols;
cols.emplace_back(col_arr.getDataPtr(), nested_type, "tmp");
return ConvertImplGenericToString::execute(cols, std::make_shared<DataTypeString>());
}
}
public: public:
static constexpr auto name = "arrayStringConcat"; static constexpr auto name = "arrayStringConcat";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); } static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
@ -761,7 +780,9 @@ public:
delimiter = col_delim->getValue<String>(); delimiter = col_delim->getValue<String>();
} }
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get())) const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
col_const_arr && isString(nested_type))
{ {
Array src_arr = col_const_arr->getValue<Array>(); Array src_arr = col_const_arr->getValue<Array>();
String dst_str; String dst_str;
@ -778,25 +799,19 @@ public:
return result_type->createColumnConst(col_const_arr->size(), dst_str); return result_type->createColumnConst(col_const_arr->size(), dst_str);
} }
ColumnPtr src_column = arguments[0].column->convertToFullColumnIfConst();
const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*src_column.get());
ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
auto col_res = ColumnString::create();
if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData()))
executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
else else
{ executeInternal(col_string, col_arr, delimiter, *col_res);
const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*arguments[0].column); return col_res;
auto col_res = ColumnString::create();
if (WhichDataType(col_arr.getData().getDataType()).isString())
{
const ColumnString & col_string = assert_cast<const ColumnString &>(col_arr.getData());
executeInternal(col_string, col_arr, delimiter, *col_res);
}
else
{
const ColumnNullable & col_nullable = assert_cast<const ColumnNullable &>(col_arr.getData());
if (const ColumnString * col_string = typeid_cast<const ColumnString *>(col_nullable.getNestedColumnPtr().get()))
executeInternal(*col_string, col_arr, delimiter, *col_res, col_nullable.getNullMapData().data());
else
col_res->insertManyDefaults(col_arr.size());
}
return col_res;
}
} }
}; };

View File

@ -65,7 +65,17 @@ yandex google test 123 hello world goodbye xyz yandex google test 123 hello wo
0 0
hello;world;xyz;def hello;world;xyz;def
1;23;456
1;23;456
127.0.0.1; 1.0.0.1
127.0.0.1; 1.0.0.1
2021-10-01; 2021-10-02
2021-10-01; 2021-10-02
hello;world;xyz;def hello;world;xyz;def
1;23;456
1;23;456
127.0.0.1; 1.0.0.1
127.0.0.1; 1.0.0.1
2021-10-01; 2021-10-02
2021-10-01; 2021-10-02

View File

@ -9,8 +9,18 @@ SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), ',') FROM sy
SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['yandex', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20; SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['yandex', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20;
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number % 4))) FROM system.numbers LIMIT 10; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number % 4))) FROM system.numbers LIMIT 10;
SELECT arrayStringConcat([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null], ';'); SELECT arrayStringConcat([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null], ';');
SELECT arrayStringConcat([Null, Null], ';');
SELECT arrayStringConcat([Null::Nullable(String), Null::Nullable(String)], ';'); SELECT arrayStringConcat([Null::Nullable(String), Null::Nullable(String)], ';');
SELECT arrayStringConcat(arr, ';') FROM (SELECT [1, 23, 456] AS arr);
SELECT arrayStringConcat(arr, ';') FROM (SELECT [Null, 1, Null, 23, Null, 456, Null] AS arr);
SELECT arrayStringConcat(arr, '; ') FROM (SELECT [toIPv4('127.0.0.1'), toIPv4('1.0.0.1')] AS arr);
SELECT arrayStringConcat(arr, '; ') FROM (SELECT [toIPv4('127.0.0.1'), Null, toIPv4('1.0.0.1')] AS arr);
SELECT arrayStringConcat(arr, '; ') FROM (SELECT [toDate('2021-10-01'), toDate('2021-10-02')] AS arr);
SELECT arrayStringConcat(arr, '; ') FROM (SELECT [toDate('2021-10-01'), Null, toDate('2021-10-02')] AS arr);
SELECT arrayStringConcat(materialize([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null]), ';'); SELECT arrayStringConcat(materialize([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null]), ';');
SELECT arrayStringConcat(materialize([Null, Null]), ';');
SELECT arrayStringConcat(materialize([Null::Nullable(String), Null::Nullable(String)]), ';'); SELECT arrayStringConcat(materialize([Null::Nullable(String), Null::Nullable(String)]), ';');
SELECT arrayStringConcat(arr, ';') FROM (SELECT materialize([1, 23, 456]) AS arr);
SELECT arrayStringConcat(arr, ';') FROM (SELECT materialize([Null, 1, Null, 23, Null, 456, Null]) AS arr);
SELECT arrayStringConcat(arr, '; ') FROM (SELECT materialize([toIPv4('127.0.0.1'), toIPv4('1.0.0.1')]) AS arr);
SELECT arrayStringConcat(arr, '; ') FROM (SELECT materialize([toIPv4('127.0.0.1'), Null, toIPv4('1.0.0.1')]) AS arr);
SELECT arrayStringConcat(arr, '; ') FROM (SELECT materialize([toDate('2021-10-01'), toDate('2021-10-02')]) AS arr);
SELECT arrayStringConcat(arr, '; ') FROM (SELECT materialize([toDate('2021-10-01'), Null, toDate('2021-10-02')]) AS arr);