Merge branch 'master' of https://github.com/Miniwoffer/ClickHouse into Miniwoffer-master

This commit is contained in:
Alexey Milovidov 2019-01-17 18:04:06 +03:00
commit 303c3cc043
3 changed files with 54 additions and 25 deletions

View File

@ -14,7 +14,6 @@
#include <boost/range/join.hpp>
#include <common/logger_useful.h>
namespace DB
{
@ -39,7 +38,7 @@ CapnProtoRowInputStream::NestedField split(const Block & header, size_t i)
if (name.size() > 0 && name[0] == '.')
name.erase(0, 1);
boost::split(field.tokens, name, boost::is_any_of("."));
boost::split(field.tokens, name, boost::is_any_of("._"));
return field;
}
@ -111,31 +110,46 @@ capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::
void CapnProtoRowInputStream::createActions(const NestedFieldList & sortedFields, capnp::StructSchema reader)
{
String last;
size_t level = 0;
capnp::StructSchema::Field parent;
// Store parents and their tokens in order to backtrack
std::vector<capnp::StructSchema::Field> parents;
std::vector<std::string> tokens;
capnp::StructSchema cur_reader = reader;
size_t level = 0;
for (const auto & field : sortedFields)
{
// Move to a different field in the same structure, keep parent
if (level > 0 && field.tokens[level - 1] != last)
// Backtrack to common parent
while(level > (field.tokens.size() - 1) || !checkEqualFrom(tokens, field.tokens, level - 1))
{
auto child = getFieldOrThrow(parent.getContainingStruct(), field.tokens[level - 1]);
reader = child.getType().asStruct();
level--;
actions.push_back({Action::POP});
actions.push_back({Action::PUSH, child});
tokens.pop_back();
parents.pop_back();
if (level > 0)
{
cur_reader = parents[level-1].getType().asStruct();
}
else
{
cur_reader = reader;
break;
}
}
// Descend to a nested structure
// Go forward
for (; level < field.tokens.size() - 1; ++level)
{
auto node = getFieldOrThrow(reader, field.tokens[level]);
auto node = getFieldOrThrow(cur_reader, field.tokens[level]);
if (node.getType().isStruct())
{
// Descend to field structure
last = field.tokens[level];
parent = node;
reader = parent.getType().asStruct();
actions.push_back({Action::PUSH, parent});
parents.push_back(node);
tokens.push_back(field.tokens[level]);
cur_reader = node.getType().asStruct();
actions.push_back({Action::PUSH, node});
}
else if (node.getType().isList())
{
@ -146,7 +160,7 @@ void CapnProtoRowInputStream::createActions(const NestedFieldList & sortedFields
}
// Read field from the structure
auto node = getFieldOrThrow(reader, field.tokens[level]);
auto node = getFieldOrThrow(cur_reader, field.tokens[level]);
if (node.getType().isList() && actions.size() > 0 && actions.back().field == node)
{
// The field list here flattens Nested elements into multiple arrays
@ -188,14 +202,8 @@ CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block
for (size_t i = 0; i < num_columns; ++i)
list.push_back(split(header, i));
// Reorder list to make sure we don't have to backtrack
std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b)
{
if (a.tokens.size() == b.tokens.size())
return a.tokens < b.tokens;
return a.tokens.size() < b.tokens.size();
});
// Order list first by value of strings then by length of sting vector.
std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; });
createActions(list, root);
}

View File

@ -0,0 +1 @@
1 2 5 3 4 7 6

View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
#create the schema file
echo "@0x803231eaa402b968;struct NestedNestedOne {nestednestednumber @0 :UInt64;}struct NestedNestedTwo {nestednestedtext @0 :Text;}struct NestedOne {nestednestedone @0 :NestedNestedOne;nestednestedtwo @1 :NestedNestedTwo;nestednumber @2:UInt64;}struct NestedTwo {nestednestedone @0 :NestedNestedOne;nestednestedtwo @1 : NestedNestedTwo;nestedtext @2 :Text;}struct CapnProto { number @0 :UInt64;string @1 :Text;nestedone @2 : NestedOne;nestedtwo @3 : NestedTwo;nestedthree @4 : NestedNestedTwo;}" > test.capnp
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.capnproto_input"
$CLICKHOUSE_CLIENT -q "CREATE TABLE test.capnproto_input ( number UInt64,string String,nestedone_nestednumber UInt64,nestedone_nestednestedone_nestednestednumber UInt64,nestedone_nestednestedtwo_nestednestedtext String,nestedtwo_nestednestedtwo_nestednestedtext String,nestedtwo_nestednestedone_nestednestednumber UInt64,nestedtwo_nestedtext String) ENGINE = Memory"
echo -ne '\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x01\x00\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x12\x00\x00\x00\x0c\x00\x00\x00\x01\x00\x02\x00\x20\x00\x00\x00\x00\x00\x03\x00\x34\x00\x00\x00\x00\x00\x01\x00\x32\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x04\x00\x00\x00\x00\x00\x01\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x34\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x01\x00\x00\x00\x08\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x37\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x12\x00\x00\x00\x39\x00\x00\x00\x00\x00\x00\x00' | $CLICKHOUSE_CLIENT --stacktrace --format_schema='test:CapnProto' --query="INSERT INTO test.capnproto_input FORMAT CapnProto";
$CLICKHOUSE_CLIENT -q "SELECT * FROM test.capnproto_input"
$CLICKHOUSE_CLIENT -q "DROP TABLE test.capnproto_input"
# remove the schema file
rm test.capnp