ClickHouse/src/Functions/FunctionsStringSearch.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

246 lines
9.1 KiB
C++
Raw Normal View History

2017-04-21 17:47:27 +00:00
#pragma once
2011-10-16 07:11:36 +00:00
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
2018-09-03 04:57:01 +00:00
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
2018-09-03 04:57:01 +00:00
#include <DataTypes/DataTypesNumber.h>
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
#include <Functions/FunctionHelpers.h>
2021-05-17 07:30:42 +00:00
#include <Functions/IFunction.h>
2019-03-26 21:56:46 +00:00
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
2016-10-24 13:47:15 +00:00
2011-10-16 07:11:36 +00:00
namespace DB
{
2017-05-27 15:45:25 +00:00
/** Search and replace functions in strings:
* position(haystack, needle) - the normal search for a substring in a string, returns the position (in bytes) of the found substring starting with 1, or 0 if no substring is found.
* positionUTF8(haystack, needle) - the same, but the position is calculated at code points, provided that the string is encoded in UTF-8.
* positionCaseInsensitive(haystack, needle)
* positionCaseInsensitiveUTF8(haystack, needle)
*
2017-05-27 15:45:25 +00:00
* like(haystack, pattern) - search by the regular expression LIKE; Returns 0 or 1. Case-insensitive, but only for Latin.
2011-10-17 08:28:39 +00:00
* notLike(haystack, pattern)
*
* ilike(haystack, pattern) - like 'like' but case-insensitive
* notIlike(haystack, pattern)
*
2017-05-27 15:45:25 +00:00
* match(haystack, pattern) - search by regular expression re2; Returns 0 or 1.
2012-07-21 03:45:48 +00:00
*
* countSubstrings(haystack, needle) -- count number of occurrences of needle in haystack.
* countSubstringsCaseInsensitive(haystack, needle)
* countSubstringsCaseInsensitiveUTF8(haystack, needle)
*
* hasToken()
* hasTokenCaseInsensitive()
*
* JSON stuff:
* visitParamExtractBool()
* simpleJSONExtractBool()
* visitParamExtractFloat()
* simpleJSONExtractFloat()
* visitParamExtractInt()
* simpleJSONExtractInt()
* visitParamExtractUInt()
* simpleJSONExtractUInt()
* visitParamHas()
* simpleJSONHas()
*
2017-05-27 15:45:25 +00:00
* Applies regexp re2 and pulls:
* - the first subpattern, if the regexp has a subpattern;
* - the zero subpattern (the match part, otherwise);
* - if not match - an empty string.
* extract(haystack, pattern)
2011-10-16 07:11:36 +00:00
*/
2018-09-03 04:57:01 +00:00
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
2018-09-03 04:57:01 +00:00
}
2011-10-16 07:11:36 +00:00
enum class ExecutionErrorPolicy
{
Null,
Throw
};
template <typename Impl, ExecutionErrorPolicy execution_error_policy = ExecutionErrorPolicy::Throw>
class FunctionsStringSearch : public IFunction
{
public:
2023-01-23 22:27:48 +00:00
static constexpr auto name = Impl::name;
2021-06-01 12:20:52 +00:00
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionsStringSearch>(); }
String getName() const override { return name; }
bool isVariadic() const override { return Impl::supports_start_pos; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override
{
if (Impl::supports_start_pos)
return 0;
return 2;
}
bool useDefaultImplementationForConstants() const override { return Impl::use_default_implementation_for_constants; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override
{
return Impl::getArgumentsThatAreAlwaysConstant();
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
2011-10-16 07:11:36 +00:00
{
if (arguments.size() < 2 || 3 < arguments.size())
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
getName(), arguments.size());
if (!isStringOrFixedString(arguments[0]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",
arguments[0]->getName(), getName());
if (!isString(arguments[1]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",
arguments[1]->getName(), getName());
if (arguments.size() >= 3)
{
if (!isUnsignedInteger(arguments[2]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",
arguments[2]->getName(), getName());
}
auto return_type = std::make_shared<DataTypeNumber<typename Impl::ResultType>>();
if constexpr (execution_error_policy == ExecutionErrorPolicy::Null)
return makeNullable(return_type);
return return_type;
2011-10-16 07:11:36 +00:00
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
2011-10-16 07:11:36 +00:00
{
2020-10-18 19:00:13 +00:00
const ColumnPtr & column_haystack = arguments[0].column;
const ColumnPtr & column_needle = arguments[1].column;
ColumnPtr column_start_pos = nullptr;
if (arguments.size() >= 3)
2020-10-18 19:00:13 +00:00
column_start_pos = arguments[2].column;
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
const ColumnConst * col_haystack_const = typeid_cast<const ColumnConst *>(&*column_haystack);
const ColumnConst * col_needle_const = typeid_cast<const ColumnConst *>(&*column_needle);
using ResultType = typename Impl::ResultType;
auto col_res = ColumnVector<ResultType>::create();
auto & vec_res = col_res->getData();
2023-01-23 22:27:48 +00:00
const auto create_null_map = [&]() -> ColumnUInt8::MutablePtr
{
if constexpr (execution_error_policy == ExecutionErrorPolicy::Null)
return ColumnUInt8::create(vec_res.size());
return {};
};
if constexpr (!Impl::use_default_implementation_for_constants)
2011-10-16 07:11:36 +00:00
{
if (col_haystack_const && col_needle_const)
{
const auto is_col_start_pos_const = !column_start_pos || isColumnConst(*column_start_pos);
vec_res.resize(is_col_start_pos_const ? 1 : column_start_pos->size());
2023-01-23 22:27:48 +00:00
const auto null_map = create_null_map();
Impl::constantConstant(
col_haystack_const->getValue<String>(),
col_needle_const->getValue<String>(),
column_start_pos,
2023-01-23 22:27:48 +00:00
vec_res,
null_map.get());
if (is_col_start_pos_const)
2020-10-18 19:00:13 +00:00
return result_type->createColumnConst(col_haystack_const->size(), toField(vec_res[0]));
else
2020-10-18 19:00:13 +00:00
return col_res;
}
2011-10-16 07:11:36 +00:00
}
vec_res.resize(column_haystack->size());
2023-01-23 22:27:48 +00:00
auto null_map = create_null_map();
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
const ColumnFixedString * col_haystack_vector_fixed = checkAndGetColumn<ColumnFixedString>(&*column_haystack);
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(&*column_needle);
if (col_haystack_vector && col_needle_vector)
Impl::vectorVector(
col_haystack_vector->getChars(),
col_haystack_vector->getOffsets(),
col_needle_vector->getChars(),
col_needle_vector->getOffsets(),
column_start_pos,
2023-01-23 22:27:48 +00:00
vec_res,
null_map.get());
else if (col_haystack_vector && col_needle_const)
2023-01-23 22:27:48 +00:00
Impl::vectorConstant(
col_haystack_vector->getChars(),
col_haystack_vector->getOffsets(),
col_needle_const->getValue<String>(),
column_start_pos,
vec_res,
null_map.get());
else if (col_haystack_vector_fixed && col_needle_vector)
Impl::vectorFixedVector(
col_haystack_vector_fixed->getChars(),
col_haystack_vector_fixed->getN(),
col_needle_vector->getChars(),
col_needle_vector->getOffsets(),
column_start_pos,
2023-01-23 22:27:48 +00:00
vec_res,
null_map.get());
else if (col_haystack_vector_fixed && col_needle_const)
Impl::vectorFixedConstant(
col_haystack_vector_fixed->getChars(),
col_haystack_vector_fixed->getN(),
col_needle_const->getValue<String>(),
2023-01-23 22:27:48 +00:00
vec_res,
null_map.get());
else if (col_haystack_const && col_needle_vector)
Impl::constantVector(
col_haystack_const->getValue<String>(),
col_needle_vector->getChars(),
col_needle_vector->getOffsets(),
column_start_pos,
2023-01-23 22:27:48 +00:00
vec_res,
null_map.get());
2011-10-16 07:11:36 +00:00
else
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal columns {} and {} of arguments of function {}",
arguments[0].column->getName(),
arguments[1].column->getName(),
getName());
2023-01-23 22:27:48 +00:00
if constexpr (execution_error_policy == ExecutionErrorPolicy::Null)
return ColumnNullable::create(std::move(col_res), std::move(null_map));
2020-10-18 19:00:13 +00:00
return col_res;
2011-10-16 07:11:36 +00:00
}
};
}