mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-30 19:42:00 +00:00
more docs
This commit is contained in:
parent
ebd22e92ce
commit
b7c34e4356
@ -15,6 +15,9 @@ namespace ErrorCodes
|
|||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle state transitions and a few states like `FLUSH_PAIR` and `END`.
|
||||||
|
* */
|
||||||
template <typename StateHandler>
|
template <typename StateHandler>
|
||||||
class CHKeyValuePairExtractor : public KeyValuePairExtractor
|
class CHKeyValuePairExtractor : public KeyValuePairExtractor
|
||||||
{
|
{
|
||||||
|
@ -24,6 +24,9 @@ public:
|
|||||||
const std::vector<char> pair_delimiters;
|
const std::vector<char> pair_delimiters;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Validates (business logic) and creates Configurations for key-value-pair extraction.
|
||||||
|
* */
|
||||||
struct ConfigurationFactory
|
struct ConfigurationFactory
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -7,32 +7,7 @@
|
|||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* Extracts key value pairs from strings. Strings do not need to be key-value pair only,
|
|
||||||
* it can contain "noise". The below grammar is a simplified representation of what is expected/ supported:
|
|
||||||
*
|
|
||||||
* line = (reserved_char* key_value_pair)* reserved_char*
|
|
||||||
* key_value_pair = key kv_separator value
|
|
||||||
* key = <quoted_string> | asciichar asciialphanumeric*
|
|
||||||
* kv_separator = ':'
|
|
||||||
* value = <quoted_string> | asciialphanum*
|
|
||||||
* item_delimiter = ','
|
|
||||||
*
|
|
||||||
* Both key and values accepts underscores as well. Special characters must be escaped.
|
|
||||||
* Control characters (key_value_pair_separator, item_delimiter, escape_character and enclosing_character) are customizable
|
|
||||||
*
|
|
||||||
* The return type is templated and defaults to std::unordered_map<std::string, std::string>. By design, the KeyValuePairExtractor
|
|
||||||
* should extract key value pairs and return them properly escaped (in order to escape, strings are necessary. string_views cannot be used).
|
|
||||||
* The built-in SimpleKeyValuePairEscapingProcessor implements a very simple and non optimized escaping algorithm. For clients that need
|
|
||||||
* better performance, this abstraction allows custom escaping processors to be injected.
|
|
||||||
*
|
|
||||||
* ClickHouse injects a NoOp escaping processor that returns an unescaped std::unordered_map<std::string_view, std::string_view>. This avoids
|
|
||||||
* unnecessary copies and allows escaping to do be done on client side. At the same time, the KeyValuePairExtractor class can be unit tested
|
|
||||||
* in a standalone manner by using the SimpleKeyValuePairEscapingProcessor for escaping.
|
|
||||||
*
|
|
||||||
* If we want to simplify this in the future, approach #2 in https://github.com/ClickHouse/ClickHouse/pull/43606#discussion_r1049541759 seems
|
|
||||||
* to be the best bet.
|
|
||||||
* */
|
|
||||||
struct KeyValuePairExtractor
|
struct KeyValuePairExtractor
|
||||||
{
|
{
|
||||||
virtual ~KeyValuePairExtractor() = default;
|
virtual ~KeyValuePairExtractor() = default;
|
||||||
|
@ -11,6 +11,10 @@ namespace DB
|
|||||||
namespace extractKV
|
namespace extractKV
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* `StateHandlerImpl` makes use of string search algorithms to find delimiters. This class creates the needles for each state
|
||||||
|
* based on the contents of `Configuration`.
|
||||||
|
* */
|
||||||
template <bool WITH_ESCAPING>
|
template <bool WITH_ESCAPING>
|
||||||
class NeedleFactory
|
class NeedleFactory
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user