mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Improve usability of the Regexp
format
This commit is contained in:
parent
96e858480e
commit
89ef4e9b53
@ -13,10 +13,14 @@ namespace ErrorCodes
|
|||||||
{
|
{
|
||||||
extern const int INCORRECT_DATA;
|
extern const int INCORRECT_DATA;
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexpFieldExtractor::RegexpFieldExtractor(const FormatSettings & format_settings) : regexp(format_settings.regexp.regexp), skip_unmatched(format_settings.regexp.skip_unmatched)
|
RegexpFieldExtractor::RegexpFieldExtractor(const FormatSettings & format_settings) : regexp_str(format_settings.regexp.regexp), regexp(regexp_str), skip_unmatched(format_settings.regexp.skip_unmatched)
|
||||||
{
|
{
|
||||||
|
if (regexp_str.empty())
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The regular expression is not set for the `Regexp` format. It requires setting the value of the `format_regexp` setting.");
|
||||||
|
|
||||||
size_t fields_count = regexp.NumberOfCapturingGroups();
|
size_t fields_count = regexp.NumberOfCapturingGroups();
|
||||||
matched_fields.resize(fields_count);
|
matched_fields.resize(fields_count);
|
||||||
re2_arguments.resize(fields_count);
|
re2_arguments.resize(fields_count);
|
||||||
@ -58,8 +62,8 @@ bool RegexpFieldExtractor::parseRow(PeekableReadBuffer & buf)
|
|||||||
static_cast<int>(re2_arguments_ptrs.size()));
|
static_cast<int>(re2_arguments_ptrs.size()));
|
||||||
|
|
||||||
if (!match && !skip_unmatched)
|
if (!match && !skip_unmatched)
|
||||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Line \"{}\" doesn't match the regexp.",
|
throw Exception(ErrorCodes::INCORRECT_DATA, "Line \"{}\" doesn't match the regexp: `{}`",
|
||||||
std::string(buf.position(), line_to_match));
|
std::string(buf.position(), line_to_match), regexp_str);
|
||||||
|
|
||||||
buf.position() += line_size;
|
buf.position() += line_size;
|
||||||
if (!buf.eof() && !checkChar('\n', buf))
|
if (!buf.eof() && !checkChar('\n', buf))
|
||||||
|
@ -31,6 +31,7 @@ public:
|
|||||||
size_t getNumberOfGroups() const { return regexp.NumberOfCapturingGroups(); }
|
size_t getNumberOfGroups() const { return regexp.NumberOfCapturingGroups(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
String regexp_str;
|
||||||
const re2::RE2 regexp;
|
const re2::RE2 regexp;
|
||||||
// The vector of fields extracted from line using regexp.
|
// The vector of fields extracted from line using regexp.
|
||||||
std::vector<std::string_view> matched_fields;
|
std::vector<std::string_view> matched_fields;
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
regular expression is not set
|
||||||
|
`Upyachka`
|
9
tests/queries/0_stateless/03153_format_regexp_usability.sh
Executable file
9
tests/queries/0_stateless/03153_format_regexp_usability.sh
Executable file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: no-fasttest, no-parallel, no-ordinary-database, long
|
||||||
|
|
||||||
|
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CUR_DIR"/../shell_config.sh
|
||||||
|
|
||||||
|
${CLICKHOUSE_LOCAL} --query "SELECT * FROM format(Regexp, 's String', 'Hello')" 2>&1 | grep -o -F 'regular expression is not set'
|
||||||
|
${CLICKHOUSE_LOCAL} --query "SELECT * FROM format(Regexp, 's String', 'Hello') SETTINGS format_regexp = 'Upyachka'" 2>&1 | grep -o -F '`Upyachka`'
|
Loading…
Reference in New Issue
Block a user