more docs

This commit is contained in:
Arthur Passos 2023-04-05 12:33:30 -03:00
parent ebd22e92ce
commit b7c34e4356
4 changed files with 11 additions and 26 deletions

View File

@ -15,6 +15,9 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
/*
* Handle state transitions and a few states like `FLUSH_PAIR` and `END`.
* */
template <typename StateHandler> template <typename StateHandler>
class CHKeyValuePairExtractor : public KeyValuePairExtractor class CHKeyValuePairExtractor : public KeyValuePairExtractor
{ {

View File

@ -24,6 +24,9 @@ public:
const std::vector<char> pair_delimiters; const std::vector<char> pair_delimiters;
}; };
/*
* Validates (business logic) and creates Configurations for key-value-pair extraction.
* */
struct ConfigurationFactory struct ConfigurationFactory
{ {
public: public:

View File

@ -7,32 +7,7 @@
namespace DB namespace DB
{ {
/*
* Extracts key value pairs from strings. Strings do not need to be key-value pair only,
* it can contain "noise". The below grammar is a simplified representation of what is expected/ supported:
*
* line = (reserved_char* key_value_pair)* reserved_char*
* key_value_pair = key kv_separator value
* key = <quoted_string> | asciichar asciialphanumeric*
* kv_separator = ':'
* value = <quoted_string> | asciialphanum*
* item_delimiter = ','
*
* Both key and values accepts underscores as well. Special characters must be escaped.
* Control characters (key_value_pair_separator, item_delimiter, escape_character and enclosing_character) are customizable
*
* The return type is templated and defaults to std::unordered_map<std::string, std::string>. By design, the KeyValuePairExtractor
* should extract key value pairs and return them properly escaped (in order to escape, strings are necessary. string_views cannot be used).
* The built-in SimpleKeyValuePairEscapingProcessor implements a very simple and non optimized escaping algorithm. For clients that need
* better performance, this abstraction allows custom escaping processors to be injected.
*
* ClickHouse injects a NoOp escaping processor that returns an unescaped std::unordered_map<std::string_view, std::string_view>. This avoids
* unnecessary copies and allows escaping to do be done on client side. At the same time, the KeyValuePairExtractor class can be unit tested
* in a standalone manner by using the SimpleKeyValuePairEscapingProcessor for escaping.
*
* If we want to simplify this in the future, approach #2 in https://github.com/ClickHouse/ClickHouse/pull/43606#discussion_r1049541759 seems
* to be the best bet.
* */
struct KeyValuePairExtractor struct KeyValuePairExtractor
{ {
virtual ~KeyValuePairExtractor() = default; virtual ~KeyValuePairExtractor() = default;

View File

@ -11,6 +11,10 @@ namespace DB
namespace extractKV namespace extractKV
{ {
/*
* `StateHandlerImpl` makes use of string search algorithms to find delimiters. This class creates the needles for each state
* based on the contents of `Configuration`.
* */
template <bool WITH_ESCAPING> template <bool WITH_ESCAPING>
class NeedleFactory class NeedleFactory
{ {