mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
add tests & incorporate ch replace function
This commit is contained in:
parent
c4a0993ac0
commit
4fbf7177a6
@ -1,3 +1,4 @@
|
||||
add_library(clickhouse_functions_parsekeyvalue impl/LazyEscapingKeyValuePairExtractor.cpp KeyValuePairExtractor.h impl/state/KeyStateHandler.cpp impl/state/KeyStateHandler.h impl/state/State.h impl/state/ValueStateHandler.cpp impl/state/ValueStateHandler.h impl/state/StateHandler.cpp impl/state/StateHandler.h impl/KeyValuePairEscapingProcessor.h impl/KeyValuePairEscapingProcessor.cpp KeyValuePairExtractorBuilder.cpp KeyValuePairExtractorBuilder.h)
|
||||
add_headers_and_sources(clickhouse_functions_parsekeyvalue .)
|
||||
|
||||
target_link_libraries(clickhouse_functions_parsekeyvalue PRIVATE dbms)
|
@ -20,18 +20,18 @@ KeyValuePairEscapingProcessor::Response KeyValuePairEscapingProcessor::process(c
|
||||
}
|
||||
|
||||
std::string KeyValuePairEscapingProcessor::escape(std::string_view element_view) const {
|
||||
bool escape = false;
|
||||
[[maybe_unused]] bool escape = false;
|
||||
std::string element;
|
||||
|
||||
element.reserve(element_view.size());
|
||||
|
||||
for (char character : element_view) {
|
||||
if (escape) {
|
||||
escape = false;
|
||||
} else if (character == escape_character) {
|
||||
escape = true;
|
||||
continue;
|
||||
}
|
||||
// if (escape) {
|
||||
// escape = false;
|
||||
// } else if (character == escape_character) {
|
||||
// escape = true;
|
||||
// continue;
|
||||
// }
|
||||
|
||||
element.push_back(character);
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ public:
|
||||
[[nodiscard]] Response process(const ResponseViews & input) const;
|
||||
|
||||
private:
|
||||
const char escape_character;
|
||||
[[maybe_unused]] const char escape_character;
|
||||
|
||||
[[nodiscard]] std::string escape(std::string_view element_view) const;
|
||||
};
|
||||
|
@ -0,0 +1,127 @@
|
||||
#include <Functions/keyvaluepair/src/KeyValuePairExtractorBuilder.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct LazyKeyValuePairExtractorTestCase {
|
||||
std::string input;
|
||||
std::unordered_map<std::string, std::string> expected_output;
|
||||
std::shared_ptr<KeyValuePairExtractor> extractor;
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & ostr, const LazyKeyValuePairExtractorTestCase & test_case)
|
||||
{
|
||||
return ostr << test_case.input;
|
||||
}
|
||||
|
||||
struct KeyValuePairExtractorTest : public ::testing::TestWithParam<LazyKeyValuePairExtractorTestCase> {
|
||||
|
||||
};
|
||||
|
||||
TEST_P(KeyValuePairExtractorTest, KeyValuePairExtractorTests) {
|
||||
const auto & [input, expected_output, extractor] = GetParam();
|
||||
|
||||
auto result = extractor->extract(input);
|
||||
|
||||
EXPECT_EQ(result, expected_output);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
ValuesCanBeEmptyString,
|
||||
KeyValuePairExtractorTest,
|
||||
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
|
||||
{
|
||||
"age:",
|
||||
{
|
||||
{"age", ""}
|
||||
},
|
||||
KeyValuePairExtractorBuilder().build()
|
||||
},
|
||||
{
|
||||
"name: neymar, favorite_movie:,favorite_song:",
|
||||
{
|
||||
{"name", "neymar"},
|
||||
{"favorite_movie", ""},
|
||||
{"favorite_song", ""},
|
||||
},
|
||||
KeyValuePairExtractorBuilder().build()
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
MixString,
|
||||
KeyValuePairExtractorTest,
|
||||
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
|
||||
{
|
||||
R"(9 ads =nm, no\:me: neymar, age: 30, daojmskdpoa and a height: 1.75, school: lupe\ picasso, team: psg,)",
|
||||
{
|
||||
{R"(no:me)", "neymar"},
|
||||
{"age", "30"},
|
||||
{"height", "1.75"},
|
||||
{"school", "lupe picasso"},
|
||||
{"team", "psg"}
|
||||
},
|
||||
KeyValuePairExtractorBuilder().withValueSpecialCharacterAllowList({'.'}).build()
|
||||
},
|
||||
{
|
||||
"XNFHGSSF_RHRUZHVBS_KWBT: F,",
|
||||
{
|
||||
{"XNFHGSSF_RHRUZHVBS_KWBT", "F"}
|
||||
},
|
||||
KeyValuePairExtractorBuilder().build()
|
||||
},
|
||||
}
|
||||
)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
Escaping,
|
||||
KeyValuePairExtractorTest,
|
||||
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
|
||||
{
|
||||
"na,me,: neymar, age:30",
|
||||
{
|
||||
{"age", "30"}
|
||||
},
|
||||
KeyValuePairExtractorBuilder().build()
|
||||
},
|
||||
{
|
||||
"na$me,: neymar, age:30",
|
||||
{
|
||||
{"age", "30"}
|
||||
},
|
||||
KeyValuePairExtractorBuilder().build()
|
||||
},
|
||||
{
|
||||
R"(name: neymar, favorite_quote: Premature\ optimization\ is\ the\ r\$\$t\ of\ all\ evil, age:30)",
|
||||
{
|
||||
{"name", "neymar"},
|
||||
{"favorite_quote", R"(Premature optimization is the r$$t of all evil)"},
|
||||
{"age", "30"}
|
||||
},
|
||||
KeyValuePairExtractorBuilder().withEnclosingCharacter('"').build()
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
EnclosedElements,
|
||||
KeyValuePairExtractorTest,
|
||||
::testing::ValuesIn(std::initializer_list<LazyKeyValuePairExtractorTestCase> {
|
||||
{
|
||||
R"("name": "Neymar", "age": 30, team: "psg", "favorite_movie": "", height: 1.75)",
|
||||
{
|
||||
{"name", "Neymar"},
|
||||
{"age", "30"},
|
||||
{"team", "psg"},
|
||||
{"favorite_movie", ""},
|
||||
{"height", "1.75"}
|
||||
},
|
||||
KeyValuePairExtractorBuilder().withValueSpecialCharacterAllowList({'.'}).withEnclosingCharacter('"').build()
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
}
|
@ -6,6 +6,7 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Functions/keyvaluepair/src/KeyValuePairExtractorBuilder.h>
|
||||
#include <Functions/ReplaceStringImpl.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -179,14 +180,17 @@ ColumnPtr ParseKeyValue::parse(std::shared_ptr<KeyValuePairExtractor> extractor,
|
||||
// TODO avoid copying
|
||||
auto response = extractor->extract(row.toString());
|
||||
|
||||
for (const auto & [key, value] : response)
|
||||
for (auto & [key, value] : response)
|
||||
{
|
||||
keys->insert(key);
|
||||
values->insert(value);
|
||||
keys->insert(std::move(key));
|
||||
values->insert(std::move(value));
|
||||
|
||||
row_offset++;
|
||||
}
|
||||
|
||||
ReplaceStringImpl<ReplaceStringTraits::Replace::All>::vector(keys->getChars(), keys->getOffsets(), "\\", "", keys->getChars(), keys->getOffsets());
|
||||
ReplaceStringImpl<ReplaceStringTraits::Replace::All>::vector(values->getChars(), values->getOffsets(), "\\", "", values->getChars(), values->getOffsets());
|
||||
|
||||
offsets->insert(row_offset);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user