This commit is contained in:
Arthur Passos 2022-11-23 11:14:45 -03:00
parent 61b3b244b8
commit 857a64c08d
2 changed files with 140 additions and 9 deletions

View File

@ -1,7 +1,6 @@
#include "parseKeyValue.h"
#include <Columns/ColumnMap.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeString.h>
@ -26,21 +25,16 @@ ColumnPtr ParseKeyValue::executeImpl([[maybe_unused]] const ColumnsWithTypeAndNa
auto column = return_type->createColumn();
[[maybe_unused]] auto * map_column = assert_cast<ColumnMap *>(column.get());
auto data_column = arguments[0].column;
auto item_delimiter = arguments[1].column->getDataAt(0).toView();
auto key_value_pair_delimiter = arguments[2].column->getDataAt(0).toView();
auto [data_column, escape_character, key_value_pair_delimiter, item_delimiter, enclosing_character, value_special_characters_allow_list] = parseArguments(arguments);
for (auto i = 0u; i < data_column->size(); i++)
{
auto row = data_column->getDataAt(i);
auto extractor = KeyValuePairExtractorBuilder()
.withKeyValuePairDelimiter(key_value_pair_delimiter.front())
.withItemDelimiter(item_delimiter.front())
.build();
auto extractor = getExtractor(escape_character, key_value_pair_delimiter, item_delimiter, enclosing_character, value_special_characters_allow_list);
auto response = extractor->extract(row.toString());
for (auto & pair : response) {
std::cout<<pair.first<<": "<<pair.second<<"\n";
}
@ -76,6 +70,119 @@ DataTypePtr ParseKeyValue::getReturnTypeImpl(const DataTypes & /*arguments*/) co
return return_type;
}
ParseKeyValue::ParsedArguments ParseKeyValue::parseArguments(const ColumnsWithTypeAndName & arguments) const
{
if (arguments.empty()) {
// throw exception
return {};
}
auto data_column = arguments[0].column;
if (arguments.size() == 1u)
{
return ParsedArguments {
data_column,
{},
{},
{},
{},
{}
};
}
auto escape_character = arguments[1].column->getDataAt(0).toView().front();
if (arguments.size() == 2u)
{
return ParsedArguments {
data_column,
escape_character,
{},
{},
{},
{}
};
}
auto key_value_pair_delimiter = arguments[2].column->getDataAt(0).toView().front();
if (arguments.size() == 3u)
{
return ParsedArguments {
data_column,
escape_character,
key_value_pair_delimiter,
{},
{},
{}
};
}
auto item_delimiter = arguments[3].column->getDataAt(0).toView().front();
if (arguments.size() == 4u)
{
return ParsedArguments {
data_column,
escape_character,
key_value_pair_delimiter,
item_delimiter,
{},
{}
};
}
auto enclosing_character = arguments[4].column->getDataAt(0).toView().front();
if (arguments.size() == 5u)
{
return ParsedArguments {
data_column,
escape_character,
key_value_pair_delimiter,
item_delimiter,
enclosing_character,
{}
};
}
return ParsedArguments {
data_column,
escape_character,
key_value_pair_delimiter,
item_delimiter,
enclosing_character,
{}
};
}
std::shared_ptr<KeyValuePairExtractor> ParseKeyValue::getExtractor(
CharArgument escape_character, CharArgument key_value_pair_delimiter, CharArgument item_delimiter,
CharArgument enclosing_character, SetArgument value_special_characters_allow_list) const
{
auto builder = KeyValuePairExtractorBuilder();
if (escape_character) {
builder.withEscapeCharacter(escape_character.value());
}
if (key_value_pair_delimiter) {
builder.withKeyValuePairDelimiter(key_value_pair_delimiter.value());
}
if (item_delimiter) {
builder.withItemDelimiter(item_delimiter.value());
}
if (enclosing_character) {
builder.withEnclosingCharacter(enclosing_character.value());
}
builder.withValueSpecialCharacterAllowList(value_special_characters_allow_list);
return builder.build();
}
REGISTER_FUNCTION(ParseKeyValue)
{

View File

@ -1,12 +1,29 @@
#pragma once
#include <unordered_set>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Columns/ColumnString.h>
#include <Functions/keyvaluepair/KeyValuePairExtractor.h>
namespace DB {
class ParseKeyValue : public IFunction
{
using CharArgument = std::optional<char>;
using SetArgument = std::unordered_set<char>;
struct ParsedArguments {
ColumnPtr data_column;
CharArgument escape_character;
CharArgument key_value_pair_delimiter;
CharArgument item_delimiter;
CharArgument enclosing_character;
std::unordered_set<char> value_special_characters_allow_list;
};
public:
ParseKeyValue();
@ -32,6 +49,13 @@ public:
private:
DataTypePtr return_type;
ParsedArguments parseArguments(const ColumnsWithTypeAndName & arguments) const;
std::shared_ptr<KeyValuePairExtractor> getExtractor(CharArgument escape_character, CharArgument key_value_pair_delimiter,
CharArgument item_delimiter, CharArgument enclosing_character,
SetArgument value_special_characters_allow_list) const;
};
}