mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 18:12:02 +00:00
more docs
This commit is contained in:
parent
ebd22e92ce
commit
b7c34e4356
@ -15,6 +15,9 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle state transitions and a few states like `FLUSH_PAIR` and `END`.
|
||||
* */
|
||||
template <typename StateHandler>
|
||||
class CHKeyValuePairExtractor : public KeyValuePairExtractor
|
||||
{
|
||||
|
@ -24,6 +24,9 @@ public:
|
||||
const std::vector<char> pair_delimiters;
|
||||
};
|
||||
|
||||
/*
|
||||
* Validates (business logic) and creates Configurations for key-value-pair extraction.
|
||||
* */
|
||||
struct ConfigurationFactory
|
||||
{
|
||||
public:
|
||||
|
@ -7,32 +7,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/*
|
||||
* Extracts key value pairs from strings. Strings do not need to be key-value pair only,
|
||||
* it can contain "noise". The below grammar is a simplified representation of what is expected/ supported:
|
||||
*
|
||||
* line = (reserved_char* key_value_pair)* reserved_char*
|
||||
* key_value_pair = key kv_separator value
|
||||
* key = <quoted_string> | asciichar asciialphanumeric*
|
||||
* kv_separator = ':'
|
||||
* value = <quoted_string> | asciialphanum*
|
||||
* item_delimiter = ','
|
||||
*
|
||||
* Both key and values accepts underscores as well. Special characters must be escaped.
|
||||
* Control characters (key_value_pair_separator, item_delimiter, escape_character and enclosing_character) are customizable
|
||||
*
|
||||
* The return type is templated and defaults to std::unordered_map<std::string, std::string>. By design, the KeyValuePairExtractor
|
||||
* should extract key value pairs and return them properly escaped (in order to escape, strings are necessary. string_views cannot be used).
|
||||
* The built-in SimpleKeyValuePairEscapingProcessor implements a very simple and non optimized escaping algorithm. For clients that need
|
||||
* better performance, this abstraction allows custom escaping processors to be injected.
|
||||
*
|
||||
* ClickHouse injects a NoOp escaping processor that returns an unescaped std::unordered_map<std::string_view, std::string_view>. This avoids
|
||||
* unnecessary copies and allows escaping to do be done on client side. At the same time, the KeyValuePairExtractor class can be unit tested
|
||||
* in a standalone manner by using the SimpleKeyValuePairEscapingProcessor for escaping.
|
||||
*
|
||||
* If we want to simplify this in the future, approach #2 in https://github.com/ClickHouse/ClickHouse/pull/43606#discussion_r1049541759 seems
|
||||
* to be the best bet.
|
||||
* */
|
||||
|
||||
struct KeyValuePairExtractor
|
||||
{
|
||||
virtual ~KeyValuePairExtractor() = default;
|
||||
|
@ -11,6 +11,10 @@ namespace DB
|
||||
namespace extractKV
|
||||
{
|
||||
|
||||
/*
|
||||
* `StateHandlerImpl` makes use of string search algorithms to find delimiters. This class creates the needles for each state
|
||||
* based on the contents of `Configuration`.
|
||||
* */
|
||||
template <bool WITH_ESCAPING>
|
||||
class NeedleFactory
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user