formatting challenge

This commit is contained in:
Arthur Passos 2022-12-13 12:35:40 -03:00
parent afbb4dd138
commit c303f644f7
11 changed files with 152 additions and 181 deletions

View File

@ -12,7 +12,7 @@ struct KeyValuePairEscapingProcessor
KeyValuePairEscapingProcessor() = default;
virtual ~KeyValuePairEscapingProcessor() = default;
virtual Response process(const ResponseViews&) const = 0;
virtual Response process(const ResponseViews &) const = 0;
};
}

View File

@ -1,7 +1,7 @@
#pragma once
#include <unordered_map>
#include <string>
#include <unordered_map>
namespace DB
{

View File

@ -3,11 +3,11 @@
#include <memory>
#include <optional>
#include <unordered_set>
#include "KeyValuePairExtractor.h"
#include <Functions/keyvaluepair/src/impl/LazyEscapingKeyValuePairExtractor.h>
#include <Functions/keyvaluepair/src/impl/SimpleKeyValuePairEscapingProcessor.h>
#include <Functions/keyvaluepair/src/impl/state/KeyStateHandler.h>
#include <Functions/keyvaluepair/src/impl/state/ValueStateHandler.h>
#include "KeyValuePairExtractor.h"
namespace DB
{
@ -60,7 +60,7 @@ public:
if (!escaping_processor)
{
throw std::runtime_error {"Escaping processor must be set, cannot build KeyValuePairExtractor without one"};
throw std::runtime_error{"Escaping processor must be set, cannot build KeyValuePairExtractor without one"};
}
return std::make_shared<LazyEscapingKeyValuePairExtractor<Response>>(key_state_handler, value_state_handler, escaping_processor);

View File

@ -24,9 +24,13 @@ template <typename Response>
class LazyEscapingKeyValuePairExtractor : public KeyValuePairExtractor<Response>
{
public:
LazyEscapingKeyValuePairExtractor(KeyStateHandler key_state_handler_, ValueStateHandler value_state_handler_,
std::shared_ptr<KeyValuePairEscapingProcessor<Response>> escaping_processor_)
: key_state_handler(key_state_handler_), value_state_handler(value_state_handler_), escaping_processor(escaping_processor_){}
LazyEscapingKeyValuePairExtractor(
KeyStateHandler key_state_handler_,
ValueStateHandler value_state_handler_,
std::shared_ptr<KeyValuePairEscapingProcessor<Response>> escaping_processor_)
: key_state_handler(key_state_handler_), value_state_handler(value_state_handler_), escaping_processor(escaping_processor_)
{
}
[[nodiscard]] Response extract(const std::string & file) override
{
@ -34,7 +38,8 @@ public:
std::size_t pos = 0;
while (state != State::END) {
while (state != State::END)
{
auto nextState = processState(file, pos, state);
pos = nextState.pos;
@ -47,7 +52,8 @@ public:
private:
NextState processState(const std::string & file, std::size_t pos, State state)
{
switch (state) {
switch (state)
{
case State::WAITING_KEY:
return key_state_handler.wait(file, pos);
case State::READING_KEY:
@ -67,10 +73,7 @@ private:
case State::FLUSH_PAIR:
return flushPair(file, pos);
case END:
return {
pos,
state
};
return {pos, state};
}
}
@ -78,10 +81,7 @@ private:
{
response_views[key_state_handler.get()] = value_state_handler.get();
return {
pos,
pos == file.size() ? State::END : State::WAITING_KEY
};
return {pos, pos == file.size() ? State::END : State::WAITING_KEY};
}
KeyStateHandler key_state_handler;
@ -92,4 +92,3 @@ private:
};
}

View File

@ -4,8 +4,9 @@ namespace DB
{
SimpleKeyValuePairEscapingProcessor::SimpleKeyValuePairEscapingProcessor(char escape_character_)
: KeyValuePairEscapingProcessor<std::unordered_map<std::string, std::string>>(), escape_character(escape_character_)
{}
: KeyValuePairEscapingProcessor<std::unordered_map<std::string, std::string>>(), escape_character(escape_character_)
{
}
SimpleKeyValuePairEscapingProcessor::Response SimpleKeyValuePairEscapingProcessor::process(const ResponseViews & response_views) const
{
@ -13,7 +14,8 @@ SimpleKeyValuePairEscapingProcessor::Response SimpleKeyValuePairEscapingProcesso
response.reserve(response_views.size());
for (auto [key_view, value_view] : response_views) {
for (auto [key_view, value_view] : response_views)
{
response[escape(key_view)] = escape(value_view);
}
@ -27,10 +29,14 @@ std::string SimpleKeyValuePairEscapingProcessor::escape(std::string_view element
element.reserve(element_view.size());
for (char character : element_view) {
if (escape) {
for (char character : element_view)
{
if (escape)
{
escape = false;
} else if (character == escape_character) {
}
else if (character == escape_character)
{
escape = true;
continue;
}

View File

@ -1,7 +1,7 @@
#pragma once
#include <string>
#include <optional>
#include <string>
#include "State.h"
#include "StateHandler.h"
@ -9,13 +9,14 @@
namespace DB
{
class KeyStateHandler : StateHandler {
class KeyStateHandler : StateHandler
{
public:
KeyStateHandler(char key_value_delimiter, char escape_character, std::optional<char> enclosing_character);
[[nodiscard]] NextState wait(const std::string & file, size_t pos) const;
[[nodiscard]] NextState read(const std::string & file, size_t pos);
[[nodiscard]] NextState readEnclosed(const std::string &file, size_t pos);
[[nodiscard]] NextState readEnclosed(const std::string & file, size_t pos);
[[nodiscard]] NextState readKeyValueDelimiter(const std::string & file, size_t pos) const;
[[nodiscard]] std::string_view get() const override;

View File

@ -1,5 +1,5 @@
#include <string>
#include "StateHandler.h"
#include <string>
namespace DB
{
@ -7,11 +7,11 @@ namespace DB
StateHandler::StateHandler(char escape_character_, std::optional<char> enclosing_character_)
: escape_character(escape_character_), enclosing_character(enclosing_character_)
{
}
std::string_view StateHandler::createElement(const std::string & file, std::size_t begin, std::size_t end) const {
return std::string_view {file.begin() + begin, file.begin() + end};
std::string_view StateHandler::createElement(const std::string & file, std::size_t begin, std::size_t end) const
{
return std::string_view{file.begin() + begin, file.begin() + end};
}
}

View File

@ -6,7 +6,8 @@
namespace DB
{
struct StateHandler {
struct StateHandler
{
StateHandler(char escape_character, std::optional<char> enclosing_character);
StateHandler(const StateHandler &) = default;

View File

@ -3,109 +3,108 @@
namespace DB
{
ValueStateHandler::ValueStateHandler(char escape_character_, char item_delimiter_,
std::optional<char> enclosing_character_,
std::unordered_set<char> special_character_allowlist_)
: StateHandler(escape_character_, enclosing_character_), item_delimiter(item_delimiter_),
special_character_allowlist(std::move(special_character_allowlist_))
{}
ValueStateHandler::ValueStateHandler(
char escape_character_,
char item_delimiter_,
std::optional<char> enclosing_character_,
std::unordered_set<char> special_character_allowlist_)
: StateHandler(escape_character_, enclosing_character_)
, item_delimiter(item_delimiter_)
, special_character_allowlist(std::move(special_character_allowlist_))
{
}
NextState ValueStateHandler::wait(const std::string &file, size_t pos) const {
while (pos < file.size()) {
NextState ValueStateHandler::wait(const std::string & file, size_t pos) const
{
while (pos < file.size())
{
const auto current_character = file[pos];
if (current_character == enclosing_character) {
return {
pos + 1u,
State::READING_ENCLOSED_VALUE
};
} else if (current_character == item_delimiter) {
return {
pos,
State::READING_EMPTY_VALUE
};
} else if (isValidCharacter(current_character)) {
return {
pos,
State::READING_VALUE
};
} else {
if (current_character == enclosing_character)
{
return {pos + 1u, State::READING_ENCLOSED_VALUE};
}
else if (current_character == item_delimiter)
{
return {pos, State::READING_EMPTY_VALUE};
}
else if (isValidCharacter(current_character))
{
return {pos, State::READING_VALUE};
}
else
{
pos++;
}
}
return {
pos,
State::READING_EMPTY_VALUE
};
return {pos, State::READING_EMPTY_VALUE};
}
NextState ValueStateHandler::read(const std::string &file, size_t pos) {
NextState ValueStateHandler::read(const std::string & file, size_t pos)
{
bool escape = false;
auto start_index = pos;
value = {};
while (pos < file.size()) {
while (pos < file.size())
{
const auto current_character = file[pos++];
if (escape) {
if (escape)
{
escape = false;
} else if (escape_character == current_character) {
}
else if (escape_character == current_character)
{
escape = true;
} else if (current_character == item_delimiter || !isValidCharacter(current_character)) {
}
else if (current_character == item_delimiter || !isValidCharacter(current_character))
{
value = createElement(file, start_index, pos - 1);
return {
pos,
State::FLUSH_PAIR
};
return {pos, State::FLUSH_PAIR};
}
}
// TODO: do I really need the below logic?
// this allows empty values at the end
value = createElement(file, start_index, pos);
return {
pos,
State::FLUSH_PAIR
};
return {pos, State::FLUSH_PAIR};
}
NextState ValueStateHandler::readEnclosed(const std::string &file, size_t pos) {
NextState ValueStateHandler::readEnclosed(const std::string & file, size_t pos)
{
auto start_index = pos;
while (pos < file.size()) {
while (pos < file.size())
{
const auto current_character = file[pos++];
if (enclosing_character == current_character) {
if (enclosing_character == current_character)
{
// not checking for empty value because with current waitValue implementation
// there is no way this piece of code will be reached for the very first value character
value = createElement(file, start_index, pos - 1);
return {
pos,
State::FLUSH_PAIR
};
return {pos, State::FLUSH_PAIR};
}
}
return {
pos,
State::END
};
return {pos, State::END};
}
NextState ValueStateHandler::readEmpty(const std::string &, size_t pos) {
NextState ValueStateHandler::readEmpty(const std::string &, size_t pos)
{
value = {};
return {
pos + 1,
State::FLUSH_PAIR
};
return {pos + 1, State::FLUSH_PAIR};
}
bool ValueStateHandler::isValidCharacter(char character) const {
bool ValueStateHandler::isValidCharacter(char character) const
{
return special_character_allowlist.contains(character) || std::isalnum(character) || character == '_';
}
std::string_view ValueStateHandler::get() const {
std::string_view ValueStateHandler::get() const
{
return value;
}

View File

@ -1,7 +1,7 @@
#pragma once
#include <string>
#include <optional>
#include <string>
#include <unordered_set>
#include "State.h"
#include "StateHandler.h"
@ -9,11 +9,14 @@
namespace DB
{
class ValueStateHandler : StateHandler {
class ValueStateHandler : StateHandler
{
public:
ValueStateHandler(char escape_character, char item_delimiter,
std::optional<char> enclosing_character, std::unordered_set<char> special_character_allowlist_);
ValueStateHandler(
char escape_character,
char item_delimiter,
std::optional<char> enclosing_character,
std::unordered_set<char> special_character_allowlist_);
[[nodiscard]] NextState wait(const std::string & file, size_t pos) const;
[[nodiscard]] NextState read(const std::string & file, size_t pos);

View File

@ -4,7 +4,8 @@
namespace DB
{
struct LazyKeyValuePairExtractorTestCase {
struct LazyKeyValuePairExtractorTestCase
{
std::string input;
std::unordered_map<std::string, std::string> expected_output;
std::shared_ptr<KeyValuePairExtractor<>> extractor;
@ -15,11 +16,12 @@ std::ostream & operator<<(std::ostream & ostr, const LazyKeyValuePairExtractorTe
return ostr << test_case.input;
}
struct KeyValuePairExtractorTest : public ::testing::TestWithParam<LazyKeyValuePairExtractorTestCase> {
struct KeyValuePairExtractorTest : public ::testing::TestWithParam<LazyKeyValuePairExtractorTestCase>
{
};
TEST_P(KeyValuePairExtractorTest, KeyValuePairExtractorTests) {
TEST_P(KeyValuePairExtractorTest, KeyValuePairExtractorTests)
{
const auto & [input, expected_output, extractor] = GetParam();
auto result = extractor->extract(input);
@ -30,98 +32,58 @@ TEST_P(KeyValuePairExtractorTest, KeyValuePairExtractorTests) {
INSTANTIATE_TEST_SUITE_P(
ValuesCanBeEmptyString,
KeyValuePairExtractorTest,
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
{
"age:",
{
{"age", ""}
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()
},
{
"name: neymar, favorite_movie:,favorite_song:",
{
{"name", "neymar"},
{"favorite_movie", ""},
{"favorite_song", ""},
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()
}
})
);
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase>{
{"age:", {{"age", ""}}, KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()},
{"name: neymar, favorite_movie:,favorite_song:",
{
{"name", "neymar"},
{"favorite_movie", ""},
{"favorite_song", ""},
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()}}));
INSTANTIATE_TEST_SUITE_P(
MixString,
KeyValuePairExtractorTest,
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
{
R"(9 ads =nm, no\:me: neymar, age: 30, daojmskdpoa and a height: 1.75, school: lupe\ picasso, team: psg,)",
{
{R"(no:me)", "neymar"},
{"age", "30"},
{"height", "1.75"},
{"school", "lupe picasso"},
{"team", "psg"}
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().withValueSpecialCharacterAllowList({'.'}).build()
},
{
"XNFHGSSF_RHRUZHVBS_KWBT: F,",
{
{"XNFHGSSF_RHRUZHVBS_KWBT", "F"}
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()
},
}
)
);
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase>{
{R"(9 ads =nm, no\:me: neymar, age: 30, daojmskdpoa and a height: 1.75, school: lupe\ picasso, team: psg,)",
{{R"(no:me)", "neymar"}, {"age", "30"}, {"height", "1.75"}, {"school", "lupe picasso"}, {"team", "psg"}},
KeyValuePairExtractorBuilder()
.withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>()
.withValueSpecialCharacterAllowList({'.'})
.build()},
{"XNFHGSSF_RHRUZHVBS_KWBT: F,",
{{"XNFHGSSF_RHRUZHVBS_KWBT", "F"}},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()},
}));
INSTANTIATE_TEST_SUITE_P(
Escaping,
KeyValuePairExtractorTest,
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
{
"na,me,: neymar, age:30",
{
{"age", "30"}
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()
},
{
"na$me,: neymar, age:30",
{
{"age", "30"}
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()
},
{
R"(name: neymar, favorite_quote: Premature\ optimization\ is\ the\ r\$\$t\ of\ all\ evil, age:30)",
{
{"name", "neymar"},
{"favorite_quote", R"(Premature optimization is the r$$t of all evil)"},
{"age", "30"}
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().withEnclosingCharacter('"').build()
}
})
);
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase>{
{"na,me,: neymar, age:30",
{{"age", "30"}},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()},
{"na$me,: neymar, age:30",
{{"age", "30"}},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().build()},
{R"(name: neymar, favorite_quote: Premature\ optimization\ is\ the\ r\$\$t\ of\ all\ evil, age:30)",
{{"name", "neymar"}, {"favorite_quote", R"(Premature optimization is the r$$t of all evil)"}, {"age", "30"}},
KeyValuePairExtractorBuilder()
.withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>()
.withEnclosingCharacter('"')
.build()}}));
INSTANTIATE_TEST_SUITE_P(
EnclosedElements,
KeyValuePairExtractorTest,
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
{
R"("name": "Neymar", "age": 30, team: "psg", "favorite_movie": "", height: 1.75)",
{
{"name", "Neymar"},
{"age", "30"},
{"team", "psg"},
{"favorite_movie", ""},
{"height", "1.75"}
},
KeyValuePairExtractorBuilder().withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>().withValueSpecialCharacterAllowList({'.'}).withEnclosingCharacter('"').build()
}
})
);
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase>{
{R"("name": "Neymar", "age": 30, team: "psg", "favorite_movie": "", height: 1.75)",
{{"name", "Neymar"}, {"age", "30"}, {"team", "psg"}, {"favorite_movie", ""}, {"height", "1.75"}},
KeyValuePairExtractorBuilder()
.withEscapingProcessor<SimpleKeyValuePairEscapingProcessor>()
.withValueSpecialCharacterAllowList({'.'})
.withEnclosingCharacter('"')
.build()}}));
}