add tests & incorporate ch replace function

This commit is contained in:
Arthur Passos 2022-11-23 16:27:43 -03:00
parent c4a0993ac0
commit 4fbf7177a6
5 changed files with 143 additions and 11 deletions

View File

@ -1,3 +1,4 @@
add_library(clickhouse_functions_parsekeyvalue impl/LazyEscapingKeyValuePairExtractor.cpp KeyValuePairExtractor.h impl/state/KeyStateHandler.cpp impl/state/KeyStateHandler.h impl/state/State.h impl/state/ValueStateHandler.cpp impl/state/ValueStateHandler.h impl/state/StateHandler.cpp impl/state/StateHandler.h impl/KeyValuePairEscapingProcessor.h impl/KeyValuePairEscapingProcessor.cpp KeyValuePairExtractorBuilder.cpp KeyValuePairExtractorBuilder.h)
add_headers_and_sources(clickhouse_functions_parsekeyvalue .)
target_link_libraries(clickhouse_functions_parsekeyvalue PRIVATE dbms)

View File

@ -20,18 +20,18 @@ KeyValuePairEscapingProcessor::Response KeyValuePairEscapingProcessor::process(c
}
std::string KeyValuePairEscapingProcessor::escape(std::string_view element_view) const {
bool escape = false;
[[maybe_unused]] bool escape = false;
std::string element;
element.reserve(element_view.size());
for (char character : element_view) {
if (escape) {
escape = false;
} else if (character == escape_character) {
escape = true;
continue;
}
// if (escape) {
// escape = false;
// } else if (character == escape_character) {
// escape = true;
// continue;
// }
element.push_back(character);
}

View File

@ -17,7 +17,7 @@ public:
[[nodiscard]] Response process(const ResponseViews & input) const;
private:
const char escape_character;
[[maybe_unused]] const char escape_character;
[[nodiscard]] std::string escape(std::string_view element_view) const;
};

View File

@ -0,0 +1,127 @@
#include <Functions/keyvaluepair/src/KeyValuePairExtractorBuilder.h>
#include <gtest/gtest.h>
namespace DB
{
struct LazyKeyValuePairExtractorTestCase {
std::string input;
std::unordered_map<std::string, std::string> expected_output;
std::shared_ptr<KeyValuePairExtractor> extractor;
};
std::ostream & operator<<(std::ostream & ostr, const LazyKeyValuePairExtractorTestCase & test_case)
{
return ostr << test_case.input;
}
struct KeyValuePairExtractorTest : public ::testing::TestWithParam<LazyKeyValuePairExtractorTestCase> {
};
TEST_P(KeyValuePairExtractorTest, KeyValuePairExtractorTests) {
const auto & [input, expected_output, extractor] = GetParam();
auto result = extractor->extract(input);
EXPECT_EQ(result, expected_output);
}
INSTANTIATE_TEST_SUITE_P(
ValuesCanBeEmptyString,
KeyValuePairExtractorTest,
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
{
"age:",
{
{"age", ""}
},
KeyValuePairExtractorBuilder().build()
},
{
"name: neymar, favorite_movie:,favorite_song:",
{
{"name", "neymar"},
{"favorite_movie", ""},
{"favorite_song", ""},
},
KeyValuePairExtractorBuilder().build()
}
})
);
INSTANTIATE_TEST_SUITE_P(
MixString,
KeyValuePairExtractorTest,
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
{
R"(9 ads =nm, no\:me: neymar, age: 30, daojmskdpoa and a height: 1.75, school: lupe\ picasso, team: psg,)",
{
{R"(no:me)", "neymar"},
{"age", "30"},
{"height", "1.75"},
{"school", "lupe picasso"},
{"team", "psg"}
},
KeyValuePairExtractorBuilder().withValueSpecialCharacterAllowList({'.'}).build()
},
{
"XNFHGSSF_RHRUZHVBS_KWBT: F,",
{
{"XNFHGSSF_RHRUZHVBS_KWBT", "F"}
},
KeyValuePairExtractorBuilder().build()
},
}
)
);
INSTANTIATE_TEST_SUITE_P(
Escaping,
KeyValuePairExtractorTest,
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
{
"na,me,: neymar, age:30",
{
{"age", "30"}
},
KeyValuePairExtractorBuilder().build()
},
{
"na$me,: neymar, age:30",
{
{"age", "30"}
},
KeyValuePairExtractorBuilder().build()
},
{
R"(name: neymar, favorite_quote: Premature\ optimization\ is\ the\ r\$\$t\ of\ all\ evil, age:30)",
{
{"name", "neymar"},
{"favorite_quote", R"(Premature optimization is the r$$t of all evil)"},
{"age", "30"}
},
KeyValuePairExtractorBuilder().withEnclosingCharacter('"').build()
}
})
);
INSTANTIATE_TEST_SUITE_P(
EnclosedElements,
KeyValuePairExtractorTest,
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
{
R"("name": "Neymar", "age": 30, team: "psg", "favorite_movie": "", height: 1.75)",
{
{"name", "Neymar"},
{"age", "30"},
{"team", "psg"},
{"favorite_movie", ""},
{"height", "1.75"}
},
KeyValuePairExtractorBuilder().withValueSpecialCharacterAllowList({'.'}).withEnclosingCharacter('"').build()
}
})
);
}

View File

@ -6,6 +6,7 @@
#include <DataTypes/DataTypeString.h>
#include <Common/assert_cast.h>
#include <Functions/keyvaluepair/src/KeyValuePairExtractorBuilder.h>
#include <Functions/ReplaceStringImpl.h>
namespace DB
{
@ -179,14 +180,17 @@ ColumnPtr ParseKeyValue::parse(std::shared_ptr<KeyValuePairExtractor> extractor,
// TODO avoid copying
auto response = extractor->extract(row.toString());
for (const auto & [key, value] : response)
for (auto & [key, value] : response)
{
keys->insert(key);
values->insert(value);
keys->insert(std::move(key));
values->insert(std::move(value));
row_offset++;
}
ReplaceStringImpl<ReplaceStringTraits::Replace::All>::vector(keys->getChars(), keys->getOffsets(), "\\", "", keys->getChars(), keys->getOffsets());
ReplaceStringImpl<ReplaceStringTraits::Replace::All>::vector(values->getChars(), values->getOffsets(), "\\", "", values->getChars(), values->getOffsets());
offsets->insert(row_offset);
}