From b300781fd8995a4e8feac0e58a9b756c17fe93f7 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 18 Jul 2023 17:48:39 +0000 Subject: [PATCH] Make better, add tests --- src/Formats/StructureToFormatSchemaUtils.cpp | 1 + src/Functions/generateRandomStructure.cpp | 507 ------------------ src/Functions/structureToFormatSchema.cpp | 20 +- .../02817_structure_to_schema.reference | 488 +++++++++++++++++ .../0_stateless/02817_structure_to_schema.sh | 89 +++ 5 files changed, 588 insertions(+), 517 deletions(-) delete mode 100644 src/Functions/generateRandomStructure.cpp create mode 100644 tests/queries/0_stateless/02817_structure_to_schema.reference create mode 100755 tests/queries/0_stateless/02817_structure_to_schema.sh diff --git a/src/Formats/StructureToFormatSchemaUtils.cpp b/src/Formats/StructureToFormatSchemaUtils.cpp index 02c9af2c65f..fd187c033ec 100644 --- a/src/Formats/StructureToFormatSchemaUtils.cpp +++ b/src/Formats/StructureToFormatSchemaUtils.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { diff --git a/src/Functions/generateRandomStructure.cpp b/src/Functions/generateRandomStructure.cpp deleted file mode 100644 index 5f2153ff89f..00000000000 --- a/src/Functions/generateRandomStructure.cpp +++ /dev/null @@ -1,507 +0,0 @@ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int BAD_ARGUMENTS; -} - -class FunctionGenerateRandomStructure : public IFunction -{ -private: - enum class Type - { - Int8, - UInt8, - Bool, - Int16, - UInt16, - Int32, - UInt32, - Int64, - UInt64, - Float32, - Float64, - DateTime64, - Decimal32, - Decimal64, - Date, - Date32, - DateTime, - String, - FixedString, - IPv4, - IPv6, - Int128, - UInt128, - Int256, - UInt256, - Decimal128, - Decimal256, - Enum8, - Enum16, - Nullable, - LowCardinality, - Array, - Tuple, - Map, - Nested, - }; - - static constexpr std::array simple_types - { - Type::Int8, - Type::UInt8, - Type::Bool, - Type::Int16, - Type::UInt16, - Type::Int32, - Type::UInt32, - Type::Int64, - Type::UInt64, - Type::Float32, - Type::Float64, - Type::Date, - Type::Date32, - Type::DateTime, - Type::String, - Type::FixedString, - }; - - static constexpr std::array big_integer_types - { - Type::Int128, - Type::UInt128, - Type::Int256, - Type::UInt256, - }; - - static constexpr std::array decimal_types - { - Type::DateTime64, - Type::Decimal32, - Type::Decimal64, - }; - - static constexpr std::array big_decimal_types - { - Type::Decimal128, - Type::Decimal256, - }; - - static constexpr std::array enum_types - { - Type::Enum8, - Type::Enum16, - }; - - static constexpr std::array ip_types - { - Type::IPv4, - Type::IPv6, - }; - - static constexpr std::array complex_types - { - Type::Nullable, - Type::LowCardinality, - Type::Array, - Type::Tuple, - Type::Map, - Type::Nested, - }; - - static constexpr std::array map_key_types - { - Type::Int8, - Type::UInt8, - Type::Bool, - Type::Int16, - Type::UInt16, - Type::Int32, - Type::UInt32, - Type::Int64, - Type::UInt64, - Type::Date, - Type::Date32, - Type::DateTime, - Type::String, - Type::FixedString, - }; - - static constexpr std::array map_key_string_types - { - Type::String, - Type::FixedString - }; - - static constexpr size_t MAX_NUMBER_OF_COLUMNS = 128; - static constexpr size_t MAX_TUPLE_ELEMENTS = 16; - static constexpr size_t MAX_DATETIME64_PRECISION = 9; - static constexpr size_t MAX_DECIMAL32_PRECISION = 9; - static constexpr size_t MAX_DECIMAL64_PRECISION = 18; - static constexpr size_t MAX_DECIMAL128_PRECISION = 38; - static constexpr size_t MAX_DECIMAL256_PRECISION = 76; - static constexpr size_t MAX_DEPTH = 32; - -public: - static constexpr auto name = "generateRandomStructure"; - - static FunctionPtr create(ContextPtr /*context*/) - { - return std::make_shared(); - } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 0; } - - bool isVariadic() const override { return true; } - bool isDeterministic() const override { return false; } - bool isDeterministicInScopeOfQuery() const override { return false; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1, 2, 3, 4, 5, 6}; } - bool useDefaultImplementationForConstants() const override { return false; } - bool useDefaultImplementationForNulls() const override { return false; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() > 7) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, expected from 0 to 7", - getName(), arguments.size()); - - for (size_t i = 0; i != 2; ++i) - { - if (arguments.size() == i) - break; - - if (!isUnsignedInteger(arguments[i]) && !arguments[i]->onlyNull()) - { - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of the {} argument of function {}, expected unsigned integer or Null", - i + 1, - arguments[i]->getName(), - getName()); - } - } - - for (size_t i = 2; i != 7; ++i) - { - if (arguments.size() <= i) - break; - - if (!isUInt8(arguments[i])) - { - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of the {} argument of function {}, expected UInt8", - i + 1, - arguments[i]->getName(), - getName()); - } - } - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - size_t seed = randomSeed(); - size_t number_of_columns = 0; - - if (!arguments.empty() && !arguments[0].column->onlyNull()) - { - number_of_columns = arguments[0].column->getUInt(0); - if (number_of_columns > MAX_NUMBER_OF_COLUMNS) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Maximum allowed number of columns is {}, got {}", - MAX_NUMBER_OF_COLUMNS, - number_of_columns); - } - - if (arguments.size() > 1 && !arguments[1].column->onlyNull()) - seed = arguments[1].column->getUInt(0); - - bool allow_big_numbers = true; - if (arguments.size() > 2) - allow_big_numbers = arguments[2].column->getBool(0); - - bool allow_enums = true; - if (arguments.size() > 3) - allow_enums = arguments[3].column->getBool(0); - - bool allow_decimals = true; - if (arguments.size() > 4) - allow_decimals = arguments[4].column->getBool(0); - - bool allow_ip = true; - if (arguments.size() > 5) - allow_ip = arguments[5].column->getBool(0); - - bool only_string_map_key = false; - if (arguments.size() > 6) - only_string_map_key = arguments[6].column->getBool(0); - - pcg64 rng(seed); - if (number_of_columns == 0) - number_of_columns = generateNumberOfColumns(rng); - - auto col_res = ColumnString::create(); - String generated_structure; - for (size_t i = 0; i != number_of_columns; ++i) - { - if (i != 0) - generated_structure += ", "; - String column_name = "c" + std::to_string(i + 1); - auto type = generateRandomType(column_name, rng, allow_big_numbers, allow_enums, allow_decimals, allow_ip, only_string_map_key); - generated_structure += column_name + " " + type; - } - col_res->insert(generated_structure); - return ColumnConst::create(std::move(col_res), input_rows_count); - } - -private: - - size_t generateNumberOfColumns(pcg64 & rng) const - { - return rng() % MAX_NUMBER_OF_COLUMNS + 1; - } - - /// Helper struct to call generateRandomTypeImpl with lots of bool template arguments without writing big if/else over all bool variables. - template - struct Dispatcher - { - static auto call(const FunctionGenerateRandomStructure * f, const String & column_name, pcg64 & rng) - { - return f->generateRandomTypeImpl(column_name, rng); - } - - template - static auto call(const FunctionGenerateRandomStructure * f, const String & column_name, pcg64 & rng, bool b, Args1... ar1) - { - if (b) - return Dispatcher::call(f, column_name, rng, ar1...); - else - return Dispatcher::call(f, column_name, rng, ar1...); - } - - friend FunctionGenerateRandomStructure; - }; - - String generateRandomType(const String & column_name, pcg64 & rng, bool allow_big_numbers, bool allow_enums, bool allow_decimals, bool allow_ip, bool allow_only_string_map_keys) const - { - return Dispatcher<>::call(this, column_name, rng, allow_big_numbers, allow_enums, allow_decimals, allow_ip, allow_only_string_map_keys, true); - } - - template - String generateRandomTypeImpl(const String & column_name, pcg64 & rng, size_t depth = 0) const - { - constexpr auto all_types = getAllTypes(); - auto type = all_types[rng() % all_types.size()]; - - switch (type) - { - case Type::FixedString: - return "FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")"; - case Type::DateTime64: - return "DateTime64(" + std::to_string(rng() % MAX_DATETIME64_PRECISION) + ")"; - case Type::Decimal32: - return "Decimal32(" + std::to_string(rng() % MAX_DECIMAL32_PRECISION) + ")"; - case Type::Decimal64: - return "Decimal64(" + std::to_string(rng() % MAX_DECIMAL64_PRECISION) + ")"; - case Type::Decimal128: - return "Decimal128(" + std::to_string(rng() % MAX_DECIMAL128_PRECISION) + ")"; - case Type::Decimal256: - return "Decimal256(" + std::to_string(rng() % MAX_DECIMAL256_PRECISION) + ")"; - case Type::Enum8: - return "Enum8(" + generateEnumValues(column_name, rng) + ")"; - case Type::Enum16: - return "Enum16(" + generateEnumValues(column_name, rng) + ")"; - case Type::LowCardinality: - return "LowCardinality(" + generateLowCardinalityNestedType(rng) + ")"; - case Type::Nullable: - { - auto nested_type = generateRandomTypeImpl(column_name, rng, depth + 1); - return "Nullable(" + nested_type + ")"; - } - case Type::Array: - { - auto nested_type = generateRandomTypeImpl(column_name, rng, depth + 1); - return "Array(" + nested_type + ")"; - } - case Type::Map: - { - auto key_type = generateMapKeyType(rng); - auto value_type = generateRandomTypeImpl(column_name, rng, depth + 1); - return "Map(" + key_type + ", " + value_type + ")"; - } - case Type::Tuple: - { - size_t elements = rng() % MAX_TUPLE_ELEMENTS + 1; - bool named_tuple = rng() % 2; - String tuple_type = "Tuple("; - for (size_t i = 0; i != elements; ++i) - { - if (i != 0) - tuple_type += ", "; - - String element_name = "e" + std::to_string(i + 1); - if (named_tuple) - tuple_type += element_name + " "; - tuple_type += generateRandomTypeImpl(element_name, rng, depth + 1); - } - return tuple_type + ")"; - } - case Type::Nested: - { - size_t elements = rng() % MAX_TUPLE_ELEMENTS + 1; - String nested_type = "Nested("; - for (size_t i = 0; i != elements; ++i) - { - if (i != 0) - nested_type += ", "; - String element_name = "e" + std::to_string(i + 1); - auto element_type = generateRandomTypeImpl(element_name, rng, depth + 1); - nested_type += element_name + " " + element_type; - } - return nested_type + ")"; - } - default: - return String(magic_enum::enum_name(type)); - } - } - - template - String generateMapKeyType(pcg64 & rng) const - { - Type type; - if constexpr (allow_only_string_map_keys) - type = map_key_string_types[rng() % map_key_string_types.size()]; - else - type = map_key_types[rng() % map_key_types.size()]; - - if (type == Type::FixedString) - return "FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")"; - return String(magic_enum::enum_name(type)); - } - - String generateLowCardinalityNestedType(pcg64 & rng) const - { - /// Support only String and FixedString. - String nested_type; - if (rng() % 2) - nested_type = "String"; - else - nested_type = "FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")"; - return rng() % 2 ? nested_type : "Nullable(" + nested_type + ")"; - } - - String generateEnumValues(const String & column_name, pcg64 & rng) const - { - /// Don't generate big enums, because it will lead to really big strings - /// and slowness of this function, and it can lead to `Max query size exceeded` - /// while using this function with generateRandom. - ssize_t num_values = rng() % 16 + 1; - String result; - for (ssize_t i = 0; i != num_values; ++i) - { - if (i != 0) - result += ", "; - result += "'" + column_name + "V" + std::to_string(i) + "' = " + std::to_string(i); - } - return result; - } - - template - static constexpr auto getAllTypes() - { - constexpr size_t big_integer_types_size = big_integer_types.size() * allow_big_numbers; - constexpr size_t enum_types_size = enum_types.size() * allow_enums; - constexpr size_t decimal_types_size = decimal_types.size() * allow_decimals; - constexpr size_t big_decimal_types_size = big_decimal_types.size() * allow_big_numbers * allow_decimals; - constexpr size_t ip_types_size = ip_types.size() * allow_ip; - constexpr size_t complex_types_size = complex_types.size() * allow_complex_types; - - constexpr size_t result_size = simple_types.size() + big_integer_types_size + enum_types_size + decimal_types_size - + big_decimal_types_size + ip_types_size + complex_types_size; - std::array result; - size_t index = 0; - - for (size_t i = 0; i != simple_types.size(); ++i, ++index) - result[index] = simple_types[i]; - - for (size_t i = 0; i != big_integer_types_size; ++i, ++index) - result[index] = big_integer_types[i]; - - for (size_t i = 0; i != enum_types_size; ++i, ++index) - result[index] = enum_types[i]; - - for (size_t i = 0; i != decimal_types_size; ++i, ++index) - result[index] = decimal_types[i]; - - for (size_t i = 0; i != big_decimal_types_size; ++i, ++index) - result[index] = big_decimal_types[i]; - - for (size_t i = 0; i != ip_types_size; ++i, ++index) - result[index] = ip_types[i]; - - for (size_t i = 0; i != complex_types_size; ++i, ++index) - result[index] = complex_types[i]; - - return result; - } -}; - - -REGISTER_FUNCTION(GenerateRandomStructure) -{ - factory.registerFunction( - { - R"( -Generates a random table structure. -This function takes 4 optional constant arguments: -1) the number of column in the result structure (random by default) -2) random seed (random by default) -3) flag that indicates if big number types can be used (true by default) -4) flag that indicates if enum types can be used (true by default) -5) flag that indicates if decimal types can be used (true by default) -6) flag that indicates if ip types (IPv4, IPv6) can be used (true by default) -7) flag that indicates if map keys should be only String or FixedString (false by default) -The maximum number of columns is 128. -The function returns a value of type String. -)", - Documentation::Examples{ - {"random", "SELECT generateRandomStructure()"}, - {"with specified number of arguments", "SELECT generateRandomStructure(10)"}, - {"with specified seed", "SELECT generateRandomStructure(10, 42)"}, - {"without big number types", "SELECT generateRandomStructure(10, NULL, false)"}, - {"without enum types", "SELECT generateRandomStructure(10, NULL, true, false)"}, - {"without decimal types", "SELECT generateRandomStructure(10, NULL, true, true, false)"}, - {"without ip types", "SELECT generateRandomStructure(10, NULL, true, true, true, false)"}, - {"with only string mak key types", "SELECT generateRandomStructure(10, NULL, true, true, true, true, true)"}, - }, - Documentation::Categories{"Random"} - }, - FunctionFactory::CaseSensitive); -} - -} diff --git a/src/Functions/structureToFormatSchema.cpp b/src/Functions/structureToFormatSchema.cpp index 332db44e06e..8c561595504 100644 --- a/src/Functions/structureToFormatSchema.cpp +++ b/src/Functions/structureToFormatSchema.cpp @@ -105,15 +105,15 @@ private: REGISTER_FUNCTION(StructureToCapnProtoSchema) { - factory.registerFunction>( + factory.registerFunction>(FunctionDocumentation { - R"( + .description=R"( )", - Documentation::Examples{ - {"random", "SELECT structureToCapnProtoSchema()"}, + .examples{ + {"random", "SELECT structureToCapnProtoSchema('s String, x UInt32', 'MessageName')", ""}, }, - Documentation::Categories{"Other"} + .categories{"Other"} }, FunctionFactory::CaseSensitive); } @@ -121,15 +121,15 @@ REGISTER_FUNCTION(StructureToCapnProtoSchema) REGISTER_FUNCTION(StructureToProtobufSchema) { - factory.registerFunction>( + factory.registerFunction>(FunctionDocumentation { - R"( + .description=R"( )", - Documentation::Examples{ - {"random", "SELECT structureToCapnProtoSchema()"}, + .examples{ + {"random", "SELECT structureToCapnProtoSchema()", ""}, }, - Documentation::Categories{"Other"} + .categories{"Other"} }, FunctionFactory::CaseSensitive); } diff --git a/tests/queries/0_stateless/02817_structure_to_schema.reference b/tests/queries/0_stateless/02817_structure_to_schema.reference new file mode 100644 index 00000000000..a1aed3f171f --- /dev/null +++ b/tests/queries/0_stateless/02817_structure_to_schema.reference @@ -0,0 +1,488 @@ +CapnProto +Numbers +@0xfcfddd851150d4a7; + +struct Message +{ + int8 @0 : Int8; + uint8 @1 : UInt8; + int16 @2 : Int16; + uint16 @3 : UInt16; + int32 @4 : Int32; + uint32 @5 : UInt32; + int64 @6 : Int64; + uint64 @7 : UInt64; + int128 @8 : Data; + uint128 @9 : Data; + int256 @10 : Data; + uint256 @11 : Data; + float32 @12 : Float32; + float64 @13 : Float64; + decimal32 @14 : Int32; + decimal64 @15 : Int64; + decimal128 @16 : Data; + decimal256 @17 : Data; +} +Dates +@0xf6c4d408fe41b545; + +struct Message +{ + data @0 : UInt16; + date32 @1 : Int32; + datetime @2 : UInt32; + datatime64 @3 : Int64; +} +Strings +@0xaa8865faa0622d35; + +struct Message +{ + string @0 : Data; + fixedstring @1 : Data; +} +Special +@0xc4a88da5454bdc56; + +struct Message +{ + ipv4 @0 : UInt32; + ipv6 @1 : Data; + uuid @2 : Data; +} +Nullable +@0xec817ea81ba08bcf; + +struct Message +{ + struct Nullable + { + union + { + value @0 : UInt32; + null @1 : Void; + } + } + nullable @0 : Nullable; +} +Enums +@0x9d57ed1a12d87f28; + +struct Message +{ + enum Enum8 + { + v1 @0; + v2 @1; + v3 @2; + v4 @3; + } + enum8 @0 : Enum8; + enum Enum16 + { + v5 @0; + v6 @1; + v7 @2; + v8 @3; + v9 @4; + } + enum16 @1 : Enum16; +} +Arrays +@0xc10c035fde2e533a; + +struct Message +{ + arr1 @0 : List(UInt32); + arr2 @1 : List(List(List(UInt32))); +} +Tuples +@0xfbb3d48432e54b68; + +struct Message +{ + struct Tuple1 + { + e1 @0 : UInt32; + e2 @1 : Data; + e3 @2 : UInt32; + } + tuple1 @0 : Tuple1; + struct Tuple2 + { + struct E1 + { + e1 @0 : UInt32; + struct E2 + { + e1 @0 : Data; + e2 @1 : UInt32; + } + e2 @1 : E2; + e3 @2 : Data; + } + e1 @0 : E1; + struct E2 + { + e1 @0 : Data; + e2 @1 : UInt32; + } + e2 @1 : E2; + } + tuple2 @1 : Tuple2; +} +Maps +@0xfdb6e3129839665e; + +struct Message +{ + struct Map1 + { + struct Entry + { + key @0 : Data; + value @1 : UInt32; + } + entries @0 : List(Entry); + } + map1 @0 : Map1; + struct Map2 + { + struct Entry + { + struct Value + { + struct Entry + { + struct Value + { + struct Entry + { + key @0 : Data; + value @1 : UInt32; + } + entries @0 : List(Entry); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + map2 @1 : Map2; +} +Complex +@0x8bfcfe1141c8e85c; + +struct Message +{ + struct C1 + { + struct E1 + { + struct Entry + { + struct Value + { + union + { + value @0 : UInt32; + null @1 : Void; + } + } + key @0 : Data; + value @1 : List(List(Value)); + } + entries @0 : List(Entry); + } + e1 @0 : List(E1); + struct E2 + { + struct Entry + { + struct Value + { + struct E1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + e1 @0 : List(List(E1)); + struct E2 + { + e1 @0 : UInt32; + struct E2 + { + struct E1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + e1 @0 : List(List(E1)); + e2 @1 : UInt32; + } + e2 @1 : E2; + } + e2 @1 : List(E2); + } + key @0 : Data; + value @1 : Value; + } + entries @0 : List(Entry); + } + e2 @1 : List(E2); + } + c1 @0 : C1; +} +Read/write with no schema +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +Output schema +@0xa43428a60b94a646; + +struct Message +{ + number @0 : UInt64; +} +Bad output schema path +2 +2 +Protobuf +Numbers +syntax = "proto3"; + +message Message +{ + int32 int8 = 1; + uint32 uint8 = 2; + int32 int16 = 3; + uint32 uint16 = 4; + int32 int32 = 5; + uint32 uint32 = 6; + int64 int64 = 7; + uint64 uint64 = 8; + bytes int128 = 9; + bytes uint128 = 10; + bytes int256 = 11; + bytes uint256 = 12; + float float32 = 13; + double float64 = 14; + bytes decimal32 = 15; + bytes decimal64 = 16; + bytes decimal128 = 17; + bytes decimal256 = 18; +} +Dates +syntax = "proto3"; + +message Message +{ + uint32 data = 1; + int32 date32 = 2; + uint32 datetime = 3; + uint64 datatime64 = 4; +} +Strings +syntax = "proto3"; + +message Message +{ + bytes string = 1; + bytes fixedstring = 2; +} +Special +syntax = "proto3"; + +message Message +{ + uint32 ipv4 = 1; + bytes ipv6 = 2; + bytes uuid = 3; +} +Nullable +syntax = "proto3"; + +message Message +{ + uint32 nullable = 1; +} +Enums +syntax = "proto3"; + +message Message +{ + enum Enum8 + { + v1 = 0; + v2 = 1; + v3 = 2; + v4 = 3; + } + Enum8 enum8 = 1; + enum Enum16 + { + v5 = 0; + v6 = 1; + v7 = 2; + v8 = 3; + v9 = 4; + } + Enum16 enum16 = 2; +} +Arrays +syntax = "proto3"; + +message Message +{ + repeated uint32 arr1 = 1; + message Arr2 + { + message Arr2 + { + repeated uint32 arr2 = 1; + } + repeated Arr2 arr2 = 1; + } + repeated Arr2 arr2 = 2; +} +Tuples +syntax = "proto3"; + +message Message +{ + message Tuple1 + { + uint32 e1 = 1; + bytes e2 = 2; + uint32 e3 = 3; + } + Tuple1 tuple1 = 1; + message Tuple2 + { + message E1 + { + uint32 e1 = 1; + message E2 + { + bytes e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + bytes e3 = 3; + } + E1 e1 = 1; + message E2 + { + bytes e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + } + Tuple2 tuple2 = 2; +} +Maps +syntax = "proto3"; + +message Message +{ + map map1 = 1; + message Map2Value + { + message Map2ValueValue + { + map map2ValueValue = 1; + } + map map2Value = 1; + } + map map2 = 2; +} +Complex +syntax = "proto3"; + +message Message +{ + message C1 + { + message E1 + { + message E1Value + { + message E1Value + { + repeated uint32 e1Value = 1; + } + repeated E1Value e1Value = 1; + } + map e1 = 1; + } + repeated E1 e1 = 1; + message E2 + { + message E2Value + { + message E1 + { + repeated bytes e1 = 1; + } + repeated E1 e1 = 1; + message E2 + { + uint32 e1 = 1; + message E2 + { + message E1 + { + repeated bytes e1 = 1; + } + repeated E1 e1 = 1; + uint32 e2 = 2; + } + E2 e2 = 2; + } + repeated E2 e2 = 2; + } + map e2 = 1; + } + repeated E2 e2 = 2; + } + C1 c1 = 1; +} +Read/write with no schema +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +Output schema +syntax = "proto3"; + +message Message +{ + uint64 number = 1; +} +Bad output schema path +2 +2 diff --git a/tests/queries/0_stateless/02817_structure_to_schema.sh b/tests/queries/0_stateless/02817_structure_to_schema.sh new file mode 100755 index 00000000000..8aaf35acb33 --- /dev/null +++ b/tests/queries/0_stateless/02817_structure_to_schema.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-data +SCHEMA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-schema + +function test_structure() +{ + format=$1 + ext=$2 + structure=$3 + + $CLICKHOUSE_LOCAL -q "select structureTo${format}Schema('$structure') format TSVRaw" > $SCHEMA_FILE.$ext + cat $SCHEMA_FILE.$ext + + $CLICKHOUSE_LOCAL -q "select * from generateRandom('$structure', 42) limit 10 format $format settings format_schema='$SCHEMA_FILE:Message', format_capn_proto_enum_comparising_mode='by_names'" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', $format, '$structure') format Null settings format_schema='$SCHEMA_FILE:Message', format_capn_proto_enum_comparising_mode='by_names'" + +} + +function test_format() +{ + format=$1 + ext=$2 + + echo $format + + echo Numbers + numbers='int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256, float32 Float32, float64 Float64, decimal32 Decimal32(3), decimal64 Decimal64(10), decimal128 Decimal128(20), decimal256 Decimal256(40)' + test_structure $format $ext "$numbers" + + echo Dates + dates='data Date, date32 Date32, datetime DateTime, datatime64 DateTime64(9)' + test_structure $format $ext "$dates" + + echo Strings + strings='string String, fixedstring FixedString(42)' + test_structure $format $ext "$strings" + + echo Special + special='ipv4 IPv4, ipv6 IPv6, uuid UUID' + test_structure $format $ext "$special" + + echo Nullable + nullable='nullable Nullable(UInt32)' + test_structure $format $ext "$nullable" + + echo Enums + enums="enum8 Enum8(''v1'' = -100, ''v2'' = -10, ''v3'' = 0, ''v4'' = 42), enum16 Enum16(''v5'' = -2000, ''v6'' = -1000, ''v7'' = 0, ''v8'' = 1000, ''v9'' = 2000)" + test_structure $format $ext "$enums" + + echo Arrays + arrays='arr1 Array(UInt32), arr2 Array(Array(Array(UInt32)))' + test_structure $format $ext "$arrays" + + echo Tuples + tuples='tuple1 Tuple(e1 UInt32, e2 String, e3 DateTime), tuple2 Tuple(e1 Tuple(e1 UInt32, e2 Tuple(e1 String, e2 DateTime), e3 String), e2 Tuple(e1 String, e2 UInt32))' + test_structure $format $ext "$tuples" + + echo Maps + maps='map1 Map(String, UInt32), map2 Map(String, Map(String, Map(String, UInt32)))' + test_structure $format $ext "$maps" + + echo Complex + complex='c1 Array(Tuple(e1 Map(String, Array(Array(Nullable(UInt32)))), e2 Map(String, Tuple(e1 Array(Array(Nullable(String))), e2 Nested(e1 UInt32, e2 Tuple(e1 Array(Array(Nullable(String))), e2 UInt32))))))' + test_structure $format $ext "$complex" + + echo "Read/write with no schema" + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', $format, 'number UInt64')" + + echo "Output schema" + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format $format settings output_format_schema='$SCHEMA_FILE.$ext'" > $DATA_FILE + cat $SCHEMA_FILE.$ext + + echo "Bad output schema path" + $CLICKHOUSE_CLIENT -q "insert into function file('$DATA_FILE', $format) select * from numbers(10) settings output_format_schema='/tmp/schema.$ext'" 2>&1 | grep "BAD_ARGUMENTS" -c + $CLICKHOUSE_CLIENT -q "insert into function file('$DATA_FILE', $format) select * from numbers(10) settings output_format_schema='../../schema.$ext'" 2>&1 | grep "BAD_ARGUMENTS" -c +} + +test_format CapnProto capnp +test_format Protobuf proto + +rm $DATA_FILE +rm $SCHEMA_FILE* +