Make better, add tests

This commit is contained in:
avogar 2023-07-18 17:48:39 +00:00
parent 67f340b501
commit b300781fd8
5 changed files with 588 additions and 517 deletions

View File

@ -1,4 +1,5 @@
#include <Formats/StructureToFormatSchemaUtils.h>
#include <IO/WriteHelpers.h>
namespace DB
{

View File

@ -1,507 +0,0 @@
#include "config.h"
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <Common/randomSeed.h>
#include <pcg_random.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
class FunctionGenerateRandomStructure : public IFunction
{
private:
enum class Type
{
Int8,
UInt8,
Bool,
Int16,
UInt16,
Int32,
UInt32,
Int64,
UInt64,
Float32,
Float64,
DateTime64,
Decimal32,
Decimal64,
Date,
Date32,
DateTime,
String,
FixedString,
IPv4,
IPv6,
Int128,
UInt128,
Int256,
UInt256,
Decimal128,
Decimal256,
Enum8,
Enum16,
Nullable,
LowCardinality,
Array,
Tuple,
Map,
Nested,
};
static constexpr std::array<Type, 16> simple_types
{
Type::Int8,
Type::UInt8,
Type::Bool,
Type::Int16,
Type::UInt16,
Type::Int32,
Type::UInt32,
Type::Int64,
Type::UInt64,
Type::Float32,
Type::Float64,
Type::Date,
Type::Date32,
Type::DateTime,
Type::String,
Type::FixedString,
};
static constexpr std::array<Type, 4> big_integer_types
{
Type::Int128,
Type::UInt128,
Type::Int256,
Type::UInt256,
};
static constexpr std::array<Type, 3> decimal_types
{
Type::DateTime64,
Type::Decimal32,
Type::Decimal64,
};
static constexpr std::array<Type, 2> big_decimal_types
{
Type::Decimal128,
Type::Decimal256,
};
static constexpr std::array<Type, 2> enum_types
{
Type::Enum8,
Type::Enum16,
};
static constexpr std::array<Type, 2> ip_types
{
Type::IPv4,
Type::IPv6,
};
static constexpr std::array<Type, 6> complex_types
{
Type::Nullable,
Type::LowCardinality,
Type::Array,
Type::Tuple,
Type::Map,
Type::Nested,
};
static constexpr std::array<Type, 14> map_key_types
{
Type::Int8,
Type::UInt8,
Type::Bool,
Type::Int16,
Type::UInt16,
Type::Int32,
Type::UInt32,
Type::Int64,
Type::UInt64,
Type::Date,
Type::Date32,
Type::DateTime,
Type::String,
Type::FixedString,
};
static constexpr std::array<Type, 2> map_key_string_types
{
Type::String,
Type::FixedString
};
static constexpr size_t MAX_NUMBER_OF_COLUMNS = 128;
static constexpr size_t MAX_TUPLE_ELEMENTS = 16;
static constexpr size_t MAX_DATETIME64_PRECISION = 9;
static constexpr size_t MAX_DECIMAL32_PRECISION = 9;
static constexpr size_t MAX_DECIMAL64_PRECISION = 18;
static constexpr size_t MAX_DECIMAL128_PRECISION = 38;
static constexpr size_t MAX_DECIMAL256_PRECISION = 76;
static constexpr size_t MAX_DEPTH = 32;
public:
static constexpr auto name = "generateRandomStructure";
static FunctionPtr create(ContextPtr /*context*/)
{
return std::make_shared<FunctionGenerateRandomStructure>();
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isVariadic() const override { return true; }
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1, 2, 3, 4, 5, 6}; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForNulls() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() > 7)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, expected from 0 to 7",
getName(), arguments.size());
for (size_t i = 0; i != 2; ++i)
{
if (arguments.size() == i)
break;
if (!isUnsignedInteger(arguments[i]) && !arguments[i]->onlyNull())
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of the {} argument of function {}, expected unsigned integer or Null",
i + 1,
arguments[i]->getName(),
getName());
}
}
for (size_t i = 2; i != 7; ++i)
{
if (arguments.size() <= i)
break;
if (!isUInt8(arguments[i]))
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of the {} argument of function {}, expected UInt8",
i + 1,
arguments[i]->getName(),
getName());
}
}
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t seed = randomSeed();
size_t number_of_columns = 0;
if (!arguments.empty() && !arguments[0].column->onlyNull())
{
number_of_columns = arguments[0].column->getUInt(0);
if (number_of_columns > MAX_NUMBER_OF_COLUMNS)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Maximum allowed number of columns is {}, got {}",
MAX_NUMBER_OF_COLUMNS,
number_of_columns);
}
if (arguments.size() > 1 && !arguments[1].column->onlyNull())
seed = arguments[1].column->getUInt(0);
bool allow_big_numbers = true;
if (arguments.size() > 2)
allow_big_numbers = arguments[2].column->getBool(0);
bool allow_enums = true;
if (arguments.size() > 3)
allow_enums = arguments[3].column->getBool(0);
bool allow_decimals = true;
if (arguments.size() > 4)
allow_decimals = arguments[4].column->getBool(0);
bool allow_ip = true;
if (arguments.size() > 5)
allow_ip = arguments[5].column->getBool(0);
bool only_string_map_key = false;
if (arguments.size() > 6)
only_string_map_key = arguments[6].column->getBool(0);
pcg64 rng(seed);
if (number_of_columns == 0)
number_of_columns = generateNumberOfColumns(rng);
auto col_res = ColumnString::create();
String generated_structure;
for (size_t i = 0; i != number_of_columns; ++i)
{
if (i != 0)
generated_structure += ", ";
String column_name = "c" + std::to_string(i + 1);
auto type = generateRandomType(column_name, rng, allow_big_numbers, allow_enums, allow_decimals, allow_ip, only_string_map_key);
generated_structure += column_name + " " + type;
}
col_res->insert(generated_structure);
return ColumnConst::create(std::move(col_res), input_rows_count);
}
private:
size_t generateNumberOfColumns(pcg64 & rng) const
{
return rng() % MAX_NUMBER_OF_COLUMNS + 1;
}
/// Helper struct to call generateRandomTypeImpl with lots of bool template arguments without writing big if/else over all bool variables.
template<bool ...Args>
struct Dispatcher
{
static auto call(const FunctionGenerateRandomStructure * f, const String & column_name, pcg64 & rng)
{
return f->generateRandomTypeImpl<Args...>(column_name, rng);
}
template<class ...Args1>
static auto call(const FunctionGenerateRandomStructure * f, const String & column_name, pcg64 & rng, bool b, Args1... ar1)
{
if (b)
return Dispatcher<Args..., true>::call(f, column_name, rng, ar1...);
else
return Dispatcher<Args..., false>::call(f, column_name, rng, ar1...);
}
friend FunctionGenerateRandomStructure;
};
String generateRandomType(const String & column_name, pcg64 & rng, bool allow_big_numbers, bool allow_enums, bool allow_decimals, bool allow_ip, bool allow_only_string_map_keys) const
{
return Dispatcher<>::call(this, column_name, rng, allow_big_numbers, allow_enums, allow_decimals, allow_ip, allow_only_string_map_keys, true);
}
template <bool allow_big_numbers, bool allow_enums, bool allow_decimals, bool allow_ip, bool allow_only_string_map_keys, bool allow_complex_types>
String generateRandomTypeImpl(const String & column_name, pcg64 & rng, size_t depth = 0) const
{
constexpr auto all_types = getAllTypes<allow_big_numbers, allow_enums, allow_decimals, allow_ip, allow_complex_types>();
auto type = all_types[rng() % all_types.size()];
switch (type)
{
case Type::FixedString:
return "FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")";
case Type::DateTime64:
return "DateTime64(" + std::to_string(rng() % MAX_DATETIME64_PRECISION) + ")";
case Type::Decimal32:
return "Decimal32(" + std::to_string(rng() % MAX_DECIMAL32_PRECISION) + ")";
case Type::Decimal64:
return "Decimal64(" + std::to_string(rng() % MAX_DECIMAL64_PRECISION) + ")";
case Type::Decimal128:
return "Decimal128(" + std::to_string(rng() % MAX_DECIMAL128_PRECISION) + ")";
case Type::Decimal256:
return "Decimal256(" + std::to_string(rng() % MAX_DECIMAL256_PRECISION) + ")";
case Type::Enum8:
return "Enum8(" + generateEnumValues(column_name, rng) + ")";
case Type::Enum16:
return "Enum16(" + generateEnumValues(column_name, rng) + ")";
case Type::LowCardinality:
return "LowCardinality(" + generateLowCardinalityNestedType(rng) + ")";
case Type::Nullable:
{
auto nested_type = generateRandomTypeImpl<allow_big_numbers, allow_enums, allow_decimals, allow_ip, allow_only_string_map_keys, false>(column_name, rng, depth + 1);
return "Nullable(" + nested_type + ")";
}
case Type::Array:
{
auto nested_type = generateRandomTypeImpl<allow_big_numbers, allow_enums, allow_decimals, allow_ip, allow_only_string_map_keys, true>(column_name, rng, depth + 1);
return "Array(" + nested_type + ")";
}
case Type::Map:
{
auto key_type = generateMapKeyType<allow_only_string_map_keys>(rng);
auto value_type = generateRandomTypeImpl<allow_big_numbers, allow_enums, allow_decimals, allow_ip, allow_only_string_map_keys, true>(column_name, rng, depth + 1);
return "Map(" + key_type + ", " + value_type + ")";
}
case Type::Tuple:
{
size_t elements = rng() % MAX_TUPLE_ELEMENTS + 1;
bool named_tuple = rng() % 2;
String tuple_type = "Tuple(";
for (size_t i = 0; i != elements; ++i)
{
if (i != 0)
tuple_type += ", ";
String element_name = "e" + std::to_string(i + 1);
if (named_tuple)
tuple_type += element_name + " ";
tuple_type += generateRandomTypeImpl<allow_big_numbers, allow_enums, allow_decimals, allow_ip, allow_only_string_map_keys, true>(element_name, rng, depth + 1);
}
return tuple_type + ")";
}
case Type::Nested:
{
size_t elements = rng() % MAX_TUPLE_ELEMENTS + 1;
String nested_type = "Nested(";
for (size_t i = 0; i != elements; ++i)
{
if (i != 0)
nested_type += ", ";
String element_name = "e" + std::to_string(i + 1);
auto element_type = generateRandomTypeImpl<allow_big_numbers, allow_enums, allow_decimals, allow_ip, allow_only_string_map_keys, true>(element_name, rng, depth + 1);
nested_type += element_name + " " + element_type;
}
return nested_type + ")";
}
default:
return String(magic_enum::enum_name<Type>(type));
}
}
template <bool allow_only_string_map_keys>
String generateMapKeyType(pcg64 & rng) const
{
Type type;
if constexpr (allow_only_string_map_keys)
type = map_key_string_types[rng() % map_key_string_types.size()];
else
type = map_key_types[rng() % map_key_types.size()];
if (type == Type::FixedString)
return "FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")";
return String(magic_enum::enum_name<Type>(type));
}
String generateLowCardinalityNestedType(pcg64 & rng) const
{
/// Support only String and FixedString.
String nested_type;
if (rng() % 2)
nested_type = "String";
else
nested_type = "FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")";
return rng() % 2 ? nested_type : "Nullable(" + nested_type + ")";
}
String generateEnumValues(const String & column_name, pcg64 & rng) const
{
/// Don't generate big enums, because it will lead to really big strings
/// and slowness of this function, and it can lead to `Max query size exceeded`
/// while using this function with generateRandom.
ssize_t num_values = rng() % 16 + 1;
String result;
for (ssize_t i = 0; i != num_values; ++i)
{
if (i != 0)
result += ", ";
result += "'" + column_name + "V" + std::to_string(i) + "' = " + std::to_string(i);
}
return result;
}
template <bool allow_big_numbers, bool allow_enums, bool allow_decimals, bool allow_ip, bool allow_complex_types>
static constexpr auto getAllTypes()
{
constexpr size_t big_integer_types_size = big_integer_types.size() * allow_big_numbers;
constexpr size_t enum_types_size = enum_types.size() * allow_enums;
constexpr size_t decimal_types_size = decimal_types.size() * allow_decimals;
constexpr size_t big_decimal_types_size = big_decimal_types.size() * allow_big_numbers * allow_decimals;
constexpr size_t ip_types_size = ip_types.size() * allow_ip;
constexpr size_t complex_types_size = complex_types.size() * allow_complex_types;
constexpr size_t result_size = simple_types.size() + big_integer_types_size + enum_types_size + decimal_types_size
+ big_decimal_types_size + ip_types_size + complex_types_size;
std::array<Type, result_size> result;
size_t index = 0;
for (size_t i = 0; i != simple_types.size(); ++i, ++index)
result[index] = simple_types[i];
for (size_t i = 0; i != big_integer_types_size; ++i, ++index)
result[index] = big_integer_types[i];
for (size_t i = 0; i != enum_types_size; ++i, ++index)
result[index] = enum_types[i];
for (size_t i = 0; i != decimal_types_size; ++i, ++index)
result[index] = decimal_types[i];
for (size_t i = 0; i != big_decimal_types_size; ++i, ++index)
result[index] = big_decimal_types[i];
for (size_t i = 0; i != ip_types_size; ++i, ++index)
result[index] = ip_types[i];
for (size_t i = 0; i != complex_types_size; ++i, ++index)
result[index] = complex_types[i];
return result;
}
};
REGISTER_FUNCTION(GenerateRandomStructure)
{
factory.registerFunction<FunctionGenerateRandomStructure>(
{
R"(
Generates a random table structure.
This function takes 4 optional constant arguments:
1) the number of column in the result structure (random by default)
2) random seed (random by default)
3) flag that indicates if big number types can be used (true by default)
4) flag that indicates if enum types can be used (true by default)
5) flag that indicates if decimal types can be used (true by default)
6) flag that indicates if ip types (IPv4, IPv6) can be used (true by default)
7) flag that indicates if map keys should be only String or FixedString (false by default)
The maximum number of columns is 128.
The function returns a value of type String.
)",
Documentation::Examples{
{"random", "SELECT generateRandomStructure()"},
{"with specified number of arguments", "SELECT generateRandomStructure(10)"},
{"with specified seed", "SELECT generateRandomStructure(10, 42)"},
{"without big number types", "SELECT generateRandomStructure(10, NULL, false)"},
{"without enum types", "SELECT generateRandomStructure(10, NULL, true, false)"},
{"without decimal types", "SELECT generateRandomStructure(10, NULL, true, true, false)"},
{"without ip types", "SELECT generateRandomStructure(10, NULL, true, true, true, false)"},
{"with only string mak key types", "SELECT generateRandomStructure(10, NULL, true, true, true, true, true)"},
},
Documentation::Categories{"Random"}
},
FunctionFactory::CaseSensitive);
}
}

View File

@ -105,15 +105,15 @@ private:
REGISTER_FUNCTION(StructureToCapnProtoSchema)
{
factory.registerFunction<FunctionStructureToFormatSchema<StructureToCapnProtoSchema>>(
factory.registerFunction<FunctionStructureToFormatSchema<StructureToCapnProtoSchema>>(FunctionDocumentation
{
R"(
.description=R"(
)",
Documentation::Examples{
{"random", "SELECT structureToCapnProtoSchema()"},
.examples{
{"random", "SELECT structureToCapnProtoSchema('s String, x UInt32', 'MessageName')", ""},
},
Documentation::Categories{"Other"}
.categories{"Other"}
},
FunctionFactory::CaseSensitive);
}
@ -121,15 +121,15 @@ REGISTER_FUNCTION(StructureToCapnProtoSchema)
REGISTER_FUNCTION(StructureToProtobufSchema)
{
factory.registerFunction<FunctionStructureToFormatSchema<StructureToProtobufSchema>>(
factory.registerFunction<FunctionStructureToFormatSchema<StructureToProtobufSchema>>(FunctionDocumentation
{
R"(
.description=R"(
)",
Documentation::Examples{
{"random", "SELECT structureToCapnProtoSchema()"},
.examples{
{"random", "SELECT structureToCapnProtoSchema()", ""},
},
Documentation::Categories{"Other"}
.categories{"Other"}
},
FunctionFactory::CaseSensitive);
}

View File

@ -0,0 +1,488 @@
CapnProto
Numbers
@0xfcfddd851150d4a7;
struct Message
{
int8 @0 : Int8;
uint8 @1 : UInt8;
int16 @2 : Int16;
uint16 @3 : UInt16;
int32 @4 : Int32;
uint32 @5 : UInt32;
int64 @6 : Int64;
uint64 @7 : UInt64;
int128 @8 : Data;
uint128 @9 : Data;
int256 @10 : Data;
uint256 @11 : Data;
float32 @12 : Float32;
float64 @13 : Float64;
decimal32 @14 : Int32;
decimal64 @15 : Int64;
decimal128 @16 : Data;
decimal256 @17 : Data;
}
Dates
@0xf6c4d408fe41b545;
struct Message
{
data @0 : UInt16;
date32 @1 : Int32;
datetime @2 : UInt32;
datatime64 @3 : Int64;
}
Strings
@0xaa8865faa0622d35;
struct Message
{
string @0 : Data;
fixedstring @1 : Data;
}
Special
@0xc4a88da5454bdc56;
struct Message
{
ipv4 @0 : UInt32;
ipv6 @1 : Data;
uuid @2 : Data;
}
Nullable
@0xec817ea81ba08bcf;
struct Message
{
struct Nullable
{
union
{
value @0 : UInt32;
null @1 : Void;
}
}
nullable @0 : Nullable;
}
Enums
@0x9d57ed1a12d87f28;
struct Message
{
enum Enum8
{
v1 @0;
v2 @1;
v3 @2;
v4 @3;
}
enum8 @0 : Enum8;
enum Enum16
{
v5 @0;
v6 @1;
v7 @2;
v8 @3;
v9 @4;
}
enum16 @1 : Enum16;
}
Arrays
@0xc10c035fde2e533a;
struct Message
{
arr1 @0 : List(UInt32);
arr2 @1 : List(List(List(UInt32)));
}
Tuples
@0xfbb3d48432e54b68;
struct Message
{
struct Tuple1
{
e1 @0 : UInt32;
e2 @1 : Data;
e3 @2 : UInt32;
}
tuple1 @0 : Tuple1;
struct Tuple2
{
struct E1
{
e1 @0 : UInt32;
struct E2
{
e1 @0 : Data;
e2 @1 : UInt32;
}
e2 @1 : E2;
e3 @2 : Data;
}
e1 @0 : E1;
struct E2
{
e1 @0 : Data;
e2 @1 : UInt32;
}
e2 @1 : E2;
}
tuple2 @1 : Tuple2;
}
Maps
@0xfdb6e3129839665e;
struct Message
{
struct Map1
{
struct Entry
{
key @0 : Data;
value @1 : UInt32;
}
entries @0 : List(Entry);
}
map1 @0 : Map1;
struct Map2
{
struct Entry
{
struct Value
{
struct Entry
{
struct Value
{
struct Entry
{
key @0 : Data;
value @1 : UInt32;
}
entries @0 : List(Entry);
}
key @0 : Data;
value @1 : Value;
}
entries @0 : List(Entry);
}
key @0 : Data;
value @1 : Value;
}
entries @0 : List(Entry);
}
map2 @1 : Map2;
}
Complex
@0x8bfcfe1141c8e85c;
struct Message
{
struct C1
{
struct E1
{
struct Entry
{
struct Value
{
union
{
value @0 : UInt32;
null @1 : Void;
}
}
key @0 : Data;
value @1 : List(List(Value));
}
entries @0 : List(Entry);
}
e1 @0 : List(E1);
struct E2
{
struct Entry
{
struct Value
{
struct E1
{
union
{
value @0 : Data;
null @1 : Void;
}
}
e1 @0 : List(List(E1));
struct E2
{
e1 @0 : UInt32;
struct E2
{
struct E1
{
union
{
value @0 : Data;
null @1 : Void;
}
}
e1 @0 : List(List(E1));
e2 @1 : UInt32;
}
e2 @1 : E2;
}
e2 @1 : List(E2);
}
key @0 : Data;
value @1 : Value;
}
entries @0 : List(Entry);
}
e2 @1 : List(E2);
}
c1 @0 : C1;
}
Read/write with no schema
0
1
2
3
4
5
6
7
8
9
Output schema
@0xa43428a60b94a646;
struct Message
{
number @0 : UInt64;
}
Bad output schema path
2
2
Protobuf
Numbers
syntax = "proto3";
message Message
{
int32 int8 = 1;
uint32 uint8 = 2;
int32 int16 = 3;
uint32 uint16 = 4;
int32 int32 = 5;
uint32 uint32 = 6;
int64 int64 = 7;
uint64 uint64 = 8;
bytes int128 = 9;
bytes uint128 = 10;
bytes int256 = 11;
bytes uint256 = 12;
float float32 = 13;
double float64 = 14;
bytes decimal32 = 15;
bytes decimal64 = 16;
bytes decimal128 = 17;
bytes decimal256 = 18;
}
Dates
syntax = "proto3";
message Message
{
uint32 data = 1;
int32 date32 = 2;
uint32 datetime = 3;
uint64 datatime64 = 4;
}
Strings
syntax = "proto3";
message Message
{
bytes string = 1;
bytes fixedstring = 2;
}
Special
syntax = "proto3";
message Message
{
uint32 ipv4 = 1;
bytes ipv6 = 2;
bytes uuid = 3;
}
Nullable
syntax = "proto3";
message Message
{
uint32 nullable = 1;
}
Enums
syntax = "proto3";
message Message
{
enum Enum8
{
v1 = 0;
v2 = 1;
v3 = 2;
v4 = 3;
}
Enum8 enum8 = 1;
enum Enum16
{
v5 = 0;
v6 = 1;
v7 = 2;
v8 = 3;
v9 = 4;
}
Enum16 enum16 = 2;
}
Arrays
syntax = "proto3";
message Message
{
repeated uint32 arr1 = 1;
message Arr2
{
message Arr2
{
repeated uint32 arr2 = 1;
}
repeated Arr2 arr2 = 1;
}
repeated Arr2 arr2 = 2;
}
Tuples
syntax = "proto3";
message Message
{
message Tuple1
{
uint32 e1 = 1;
bytes e2 = 2;
uint32 e3 = 3;
}
Tuple1 tuple1 = 1;
message Tuple2
{
message E1
{
uint32 e1 = 1;
message E2
{
bytes e1 = 1;
uint32 e2 = 2;
}
E2 e2 = 2;
bytes e3 = 3;
}
E1 e1 = 1;
message E2
{
bytes e1 = 1;
uint32 e2 = 2;
}
E2 e2 = 2;
}
Tuple2 tuple2 = 2;
}
Maps
syntax = "proto3";
message Message
{
map<string, uint32> map1 = 1;
message Map2Value
{
message Map2ValueValue
{
map<string, uint32> map2ValueValue = 1;
}
map<string, Map2ValueValue> map2Value = 1;
}
map<string, Map2Value> map2 = 2;
}
Complex
syntax = "proto3";
message Message
{
message C1
{
message E1
{
message E1Value
{
message E1Value
{
repeated uint32 e1Value = 1;
}
repeated E1Value e1Value = 1;
}
map<string, E1Value> e1 = 1;
}
repeated E1 e1 = 1;
message E2
{
message E2Value
{
message E1
{
repeated bytes e1 = 1;
}
repeated E1 e1 = 1;
message E2
{
uint32 e1 = 1;
message E2
{
message E1
{
repeated bytes e1 = 1;
}
repeated E1 e1 = 1;
uint32 e2 = 2;
}
E2 e2 = 2;
}
repeated E2 e2 = 2;
}
map<string, E2Value> e2 = 1;
}
repeated E2 e2 = 2;
}
C1 c1 = 1;
}
Read/write with no schema
0
1
2
3
4
5
6
7
8
9
Output schema
syntax = "proto3";
message Message
{
uint64 number = 1;
}
Bad output schema path
2
2

View File

@ -0,0 +1,89 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-data
SCHEMA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-schema
function test_structure()
{
format=$1
ext=$2
structure=$3
$CLICKHOUSE_LOCAL -q "select structureTo${format}Schema('$structure') format TSVRaw" > $SCHEMA_FILE.$ext
cat $SCHEMA_FILE.$ext
$CLICKHOUSE_LOCAL -q "select * from generateRandom('$structure', 42) limit 10 format $format settings format_schema='$SCHEMA_FILE:Message', format_capn_proto_enum_comparising_mode='by_names'" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', $format, '$structure') format Null settings format_schema='$SCHEMA_FILE:Message', format_capn_proto_enum_comparising_mode='by_names'"
}
function test_format()
{
format=$1
ext=$2
echo $format
echo Numbers
numbers='int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256, float32 Float32, float64 Float64, decimal32 Decimal32(3), decimal64 Decimal64(10), decimal128 Decimal128(20), decimal256 Decimal256(40)'
test_structure $format $ext "$numbers"
echo Dates
dates='data Date, date32 Date32, datetime DateTime, datatime64 DateTime64(9)'
test_structure $format $ext "$dates"
echo Strings
strings='string String, fixedstring FixedString(42)'
test_structure $format $ext "$strings"
echo Special
special='ipv4 IPv4, ipv6 IPv6, uuid UUID'
test_structure $format $ext "$special"
echo Nullable
nullable='nullable Nullable(UInt32)'
test_structure $format $ext "$nullable"
echo Enums
enums="enum8 Enum8(''v1'' = -100, ''v2'' = -10, ''v3'' = 0, ''v4'' = 42), enum16 Enum16(''v5'' = -2000, ''v6'' = -1000, ''v7'' = 0, ''v8'' = 1000, ''v9'' = 2000)"
test_structure $format $ext "$enums"
echo Arrays
arrays='arr1 Array(UInt32), arr2 Array(Array(Array(UInt32)))'
test_structure $format $ext "$arrays"
echo Tuples
tuples='tuple1 Tuple(e1 UInt32, e2 String, e3 DateTime), tuple2 Tuple(e1 Tuple(e1 UInt32, e2 Tuple(e1 String, e2 DateTime), e3 String), e2 Tuple(e1 String, e2 UInt32))'
test_structure $format $ext "$tuples"
echo Maps
maps='map1 Map(String, UInt32), map2 Map(String, Map(String, Map(String, UInt32)))'
test_structure $format $ext "$maps"
echo Complex
complex='c1 Array(Tuple(e1 Map(String, Array(Array(Nullable(UInt32)))), e2 Map(String, Tuple(e1 Array(Array(Nullable(String))), e2 Nested(e1 UInt32, e2 Tuple(e1 Array(Array(Nullable(String))), e2 UInt32))))))'
test_structure $format $ext "$complex"
echo "Read/write with no schema"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format $format" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', $format, 'number UInt64')"
echo "Output schema"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format $format settings output_format_schema='$SCHEMA_FILE.$ext'" > $DATA_FILE
cat $SCHEMA_FILE.$ext
echo "Bad output schema path"
$CLICKHOUSE_CLIENT -q "insert into function file('$DATA_FILE', $format) select * from numbers(10) settings output_format_schema='/tmp/schema.$ext'" 2>&1 | grep "BAD_ARGUMENTS" -c
$CLICKHOUSE_CLIENT -q "insert into function file('$DATA_FILE', $format) select * from numbers(10) settings output_format_schema='../../schema.$ext'" 2>&1 | grep "BAD_ARGUMENTS" -c
}
test_format CapnProto capnp
test_format Protobuf proto
rm $DATA_FILE
rm $SCHEMA_FILE*