ClickHouse/src/Functions/extractAll.cpp
Alexey Milovidov 66d297ee47 Fix style
2023-11-11 04:25:49 +01:00

123 lines
3.3 KiB
C++

#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionTokens.h>
#include <Functions/FunctionFactory.h>
#include <Functions/Regexps.h>
#include <Interpreters/Context.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** Functions that split strings into an array of strings or vice versa.
*
* extractAll(s, regexp) - select from the string the subsequences corresponding to the regexp.
* - first subpattern, if regexp has subpattern;
* - zero subpattern (the match part, otherwise);
* - otherwise, an empty array
*/
namespace
{
using Pos = const char *;
class ExtractAllImpl
{
private:
Regexps::RegexpPtr re;
OptimizedRegularExpression::MatchVec matches;
size_t capture;
Pos pos;
Pos end;
public:
static constexpr auto name = "extractAll";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"haystack", &isString<IDataType>, nullptr, "String"},
{"pattern", &isString<IDataType>, isColumnConst, "const String"}
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;
void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[1].column->getName(), getName());
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
matches.resize(capture + 1);
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (!pos || pos > end)
return false;
if (!re->match(pos, end - pos, matches) || !matches[0].length)
return false;
if (matches[capture].offset == std::string::npos)
{
/// Empty match.
token_begin = pos;
token_end = pos;
}
else
{
token_begin = pos + matches[capture].offset;
token_end = token_begin + matches[capture].length;
}
pos += matches[0].offset + matches[0].length;
return true;
}
};
using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
}
REGISTER_FUNCTION(ExtractAll)
{
factory.registerFunction<FunctionExtractAll>();
}
}