From 383e9264b7d6c6251192c0c60bee6cb38bffa722 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 15 Dec 2022 09:47:36 -0300 Subject: [PATCH] improve lib cmakefiles, add state docs and minor changes --- src/Functions/keyvaluepair/src/CMakeLists.txt | 6 +++++- .../keyvaluepair/src/KeyValuePairExtractor.h | 2 +- .../src/KeyValuePairExtractorBuilder.h | 2 +- .../impl/LazyEscapingKeyValuePairExtractor.h | 2 +- .../keyvaluepair/src/impl/state/State.h | 17 ++++++++++------- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/Functions/keyvaluepair/src/CMakeLists.txt b/src/Functions/keyvaluepair/src/CMakeLists.txt index 939937c2aef..f93473ae6f5 100644 --- a/src/Functions/keyvaluepair/src/CMakeLists.txt +++ b/src/Functions/keyvaluepair/src/CMakeLists.txt @@ -1,4 +1,8 @@ -add_library(clickhouse_functions_extractkeyvaluepairs KeyValuePairExtractor.h impl/state/KeyStateHandler.cpp impl/state/KeyStateHandler.h impl/state/State.h impl/state/ValueStateHandler.cpp impl/state/ValueStateHandler.h impl/state/StateHandler.cpp impl/state/StateHandler.h impl/SimpleKeyValuePairEscapingProcessor.h impl/SimpleKeyValuePairEscapingProcessor.cpp KeyValuePairExtractorBuilder.h KeyValuePairEscapingProcessor.h) +include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions_extractkeyvaluepairs .) +add_headers_and_sources(clickhouse_functions_extractkeyvaluepairs impl) +add_headers_and_sources(clickhouse_functions_extractkeyvaluepairs impl/state) + +add_library(clickhouse_functions_extractkeyvaluepairs ${clickhouse_functions_extractkeyvaluepairs_sources} ${clickhouse_functions_extractkeyvaluepairs_headers}) target_link_libraries(clickhouse_functions_extractkeyvaluepairs PRIVATE dbms) \ No newline at end of file diff --git a/src/Functions/keyvaluepair/src/KeyValuePairExtractor.h b/src/Functions/keyvaluepair/src/KeyValuePairExtractor.h index 761dc66d031..2abc8c8cc1d 100644 --- a/src/Functions/keyvaluepair/src/KeyValuePairExtractor.h +++ b/src/Functions/keyvaluepair/src/KeyValuePairExtractor.h @@ -26,7 +26,7 @@ struct KeyValuePairExtractor virtual ~KeyValuePairExtractor() = default; - virtual Response extract(const std::string & file) = 0; + virtual Response extract(const std::string & data) = 0; }; } diff --git a/src/Functions/keyvaluepair/src/KeyValuePairExtractorBuilder.h b/src/Functions/keyvaluepair/src/KeyValuePairExtractorBuilder.h index 7b190003cf4..bcb5f19925c 100644 --- a/src/Functions/keyvaluepair/src/KeyValuePairExtractorBuilder.h +++ b/src/Functions/keyvaluepair/src/KeyValuePairExtractorBuilder.h @@ -60,7 +60,7 @@ public: if (!escaping_processor) { - throw std::runtime_error{"Escaping processor must be set, cannot build KeyValuePairExtractor without one"}; + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Escaping processor must be set, cannot build KeyValuePairExtractor without one"); } return std::make_shared>(key_state_handler, value_state_handler, escaping_processor); diff --git a/src/Functions/keyvaluepair/src/impl/LazyEscapingKeyValuePairExtractor.h b/src/Functions/keyvaluepair/src/impl/LazyEscapingKeyValuePairExtractor.h index dfd29cbb392..21681acbfe7 100644 --- a/src/Functions/keyvaluepair/src/impl/LazyEscapingKeyValuePairExtractor.h +++ b/src/Functions/keyvaluepair/src/impl/LazyEscapingKeyValuePairExtractor.h @@ -42,7 +42,7 @@ public: { auto next_state = processState(file, pos, state); - pos = next_state.pos; + pos = next_state.position_in_string; state = next_state.state; } diff --git a/src/Functions/keyvaluepair/src/impl/state/State.h b/src/Functions/keyvaluepair/src/impl/state/State.h index 7141f84aa64..32249a843fe 100644 --- a/src/Functions/keyvaluepair/src/impl/state/State.h +++ b/src/Functions/keyvaluepair/src/impl/state/State.h @@ -7,28 +7,31 @@ namespace DB enum State { + // Skip characters until it finds a valid first key character. Might jump to READING_KEY, READING_ENCLOSED_KEY or END. WAITING_KEY, + // Tries to read a key. Might jump to WAITING_KEY, WAITING_VALUE or END. READING_KEY, + // Tries to read an enclosed/ quoted key. Might jump to WAITING_KEY, READING_KV_DELIMITER or END. READING_ENCLOSED_KEY, + // Tries to read the key value pair delimiter. Might jump to WAITING_KEY, WAITING_VALUE or END. READING_KV_DELIMITER, + // Skip characters until it finds a valid first value character. Might jump to READING_ENCLOSED_VALUE, READING_EMPTY_VALUE or READING_VALUE. WAITING_VALUE, + // Tries to read a value. Jumps to FLUSH_PAIR. READING_VALUE, + // Tries to read an enclosed/ quoted value. Might jump to FLUSH_PAIR or END. READING_ENCLOSED_VALUE, + // "Reads" an empty value. Jumps to FLUSH_PAIR. READING_EMPTY_VALUE, + // In this state, both key and value have already been collected and should be flushed. Might jump to WAITING_KEY or END. FLUSH_PAIR, END }; struct NextState { - std::size_t pos; + std::size_t position_in_string; State state; }; -struct NextStateWithElement -{ - NextState state; - std::string_view element; -}; - }