#include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; } namespace { const size_t MAX_NUMBER_OF_COLUMNS = 128; const size_t MAX_TUPLE_ELEMENTS = 16; const size_t MAX_DATETIME64_PRECISION = 9; const size_t MAX_DECIMAL32_PRECISION = 9; const size_t MAX_DECIMAL64_PRECISION = 18; const size_t MAX_DECIMAL128_PRECISION = 38; const size_t MAX_DECIMAL256_PRECISION = 76; const size_t MAX_DEPTH = 16; constexpr std::array simple_types { TypeIndex::Int8, TypeIndex::UInt8, TypeIndex::Int16, TypeIndex::UInt16, TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64, TypeIndex::Int128, TypeIndex::UInt128, TypeIndex::Int256, TypeIndex::UInt256, TypeIndex::Float32, TypeIndex::Float64, TypeIndex::Decimal32, TypeIndex::Decimal64, TypeIndex::Decimal128, TypeIndex::Decimal256, TypeIndex::Date, TypeIndex::Date32, TypeIndex::DateTime, TypeIndex::DateTime64, TypeIndex::String, TypeIndex::FixedString, TypeIndex::Enum8, TypeIndex::Enum16, TypeIndex::IPv4, TypeIndex::IPv6, TypeIndex::UUID, }; constexpr std::array complex_types { TypeIndex::Nullable, TypeIndex::LowCardinality, TypeIndex::Array, TypeIndex::Tuple, TypeIndex::Map, }; constexpr std::array map_key_types { TypeIndex::Int8, TypeIndex::UInt8, TypeIndex::Int16, TypeIndex::UInt16, TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64, TypeIndex::Int128, TypeIndex::UInt128, TypeIndex::Int256, TypeIndex::UInt256, TypeIndex::Date, TypeIndex::Date32, TypeIndex::DateTime, TypeIndex::String, TypeIndex::FixedString, TypeIndex::IPv4, TypeIndex::Enum8, TypeIndex::Enum16, TypeIndex::UUID, TypeIndex::LowCardinality, }; constexpr std::array suspicious_lc_types { TypeIndex::Int8, TypeIndex::UInt8, TypeIndex::Int16, TypeIndex::UInt16, TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64, TypeIndex::Int128, TypeIndex::UInt128, TypeIndex::Int256, TypeIndex::UInt256, TypeIndex::Float32, TypeIndex::Float64, TypeIndex::Date, TypeIndex::Date32, TypeIndex::DateTime, TypeIndex::String, TypeIndex::FixedString, TypeIndex::IPv4, TypeIndex::IPv6, TypeIndex::UUID, }; template constexpr auto getAllTypes() { constexpr size_t complex_types_size = complex_types.size() * allow_complex_types; constexpr size_t result_size = simple_types.size() + complex_types_size; std::array result; size_t index = 0; for (size_t i = 0; i != simple_types.size(); ++i, ++index) result[index] = simple_types[i]; for (size_t i = 0; i != complex_types_size; ++i, ++index) result[index] = complex_types[i]; return result; } size_t generateNumberOfColumns(pcg64 & rng) { return rng() % MAX_NUMBER_OF_COLUMNS + 1; } void writeLowCardinalityNestedType(pcg64 & rng, WriteBuffer & buf, bool allow_suspicious_lc_types) { bool make_nullable = rng() % 2; if (make_nullable) writeCString("Nullable(", buf); if (allow_suspicious_lc_types) { TypeIndex type = suspicious_lc_types[rng() % suspicious_lc_types.size()]; if (type == TypeIndex::FixedString) writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); else writeString(magic_enum::enum_name(type), buf); } else { /// Support only String and FixedString. if (rng() % 2) writeCString("String", buf); else writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); } if (make_nullable) writeChar(')', buf); } void writeEnumValues(const String & column_name, pcg64 & rng, WriteBuffer & buf, ssize_t max_value) { /// Don't generate big enums, because it will lead to really big result /// and slowness of this function, and it can lead to `Max query size exceeded` /// while using this function with generateRandom. size_t num_values = rng() % 16 + 1; std::vector values(num_values); /// Generate random numbers from range [-(max_value + 1), max_value - num_values + 1]. for (Int16 & x : values) x = rng() % (2 * max_value + 3 - num_values) - max_value - 1; /// Make all numbers unique. std::sort(values.begin(), values.end()); for (size_t i = 0; i < num_values; ++i) values[i] += i; std::shuffle(values.begin(), values.end(), rng); for (size_t i = 0; i != num_values; ++i) { if (i != 0) writeCString(", ", buf); writeString("'" + column_name + "V" + std::to_string(i) + "' = " + std::to_string(values[i]), buf); } } void writeMapKeyType(const String & column_name, pcg64 & rng, WriteBuffer & buf) { TypeIndex type = map_key_types[rng() % map_key_types.size()]; switch (type) { case TypeIndex::FixedString: writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); break; case TypeIndex::LowCardinality: writeCString("LowCardinality(", buf); /// Map key supports only String and FixedString inside LowCardinality. if (rng() % 2) writeCString("String", buf); else writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); writeChar(')', buf); break; case TypeIndex::Enum8: writeCString("Enum8(", buf); writeEnumValues(column_name, rng, buf, INT8_MAX); writeChar(')', buf); break; case TypeIndex::Enum16: writeCString("Enum16(", buf); writeEnumValues(column_name, rng, buf, INT16_MAX); writeChar(')', buf); break; default: writeString(magic_enum::enum_name(type), buf); break; } } template void writeRandomType(const String & column_name, pcg64 & rng, WriteBuffer & buf, bool allow_suspicious_lc_types, size_t depth = 0) { if (allow_complex_types && depth > MAX_DEPTH) writeRandomType(column_name, rng, buf, depth); constexpr auto all_types = getAllTypes(); auto type = all_types[rng() % all_types.size()]; switch (type) { case TypeIndex::UInt8: if (rng() % 2) writeCString("UInt8", buf); else writeCString("Bool", buf); return; case TypeIndex::FixedString: writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); return; case TypeIndex::DateTime64: writeString("DateTime64(" + std::to_string(rng() % MAX_DATETIME64_PRECISION + 1) + ")", buf); return; case TypeIndex::Decimal32: writeString("Decimal32(" + std::to_string(rng() % MAX_DECIMAL32_PRECISION + 1) + ")", buf); return; case TypeIndex::Decimal64: writeString("Decimal64(" + std::to_string(rng() % MAX_DECIMAL64_PRECISION + 1) + ")", buf); return; case TypeIndex::Decimal128: writeString("Decimal128(" + std::to_string(rng() % MAX_DECIMAL128_PRECISION + 1) + ")", buf); return; case TypeIndex::Decimal256: writeString("Decimal256(" + std::to_string(rng() % MAX_DECIMAL256_PRECISION + 1) + ")", buf); return; case TypeIndex::Enum8: writeCString("Enum8(", buf); writeEnumValues(column_name, rng, buf, INT8_MAX); writeChar(')', buf); return; case TypeIndex::Enum16: writeCString("Enum16(", buf); writeEnumValues(column_name, rng, buf, INT16_MAX); writeChar(')', buf); return; case TypeIndex::LowCardinality: writeCString("LowCardinality(", buf); writeLowCardinalityNestedType(rng, buf, allow_suspicious_lc_types); writeChar(')', buf); return; case TypeIndex::Nullable: { writeCString("Nullable(", buf); writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1); writeChar(')', buf); return; } case TypeIndex::Array: { writeCString("Array(", buf); writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1); writeChar(')', buf); return; } case TypeIndex::Map: { writeCString("Map(", buf); writeMapKeyType(column_name, rng, buf); writeCString(", ", buf); writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1); writeChar(')', buf); return; } case TypeIndex::Tuple: { size_t elements = rng() % MAX_TUPLE_ELEMENTS + 1; bool generate_nested = rng() % 2; bool generate_named_tuple = rng() % 2; if (generate_nested) writeCString("Nested(", buf); else writeCString("Tuple(", buf); for (size_t i = 0; i != elements; ++i) { if (i != 0) writeCString(", ", buf); String element_name = "e" + std::to_string(i + 1); if (generate_named_tuple || generate_nested) { writeString(element_name, buf); writeChar(' ', buf); } writeRandomType(element_name, rng, buf, allow_suspicious_lc_types, depth + 1); } writeChar(')', buf); return; } default: writeString(magic_enum::enum_name(type), buf); return; } } void writeRandomStructure(pcg64 & rng, size_t number_of_columns, WriteBuffer & buf, bool allow_suspicious_lc_types) { for (size_t i = 0; i != number_of_columns; ++i) { if (i != 0) writeCString(", ", buf); String column_name = "c" + std::to_string(i + 1); writeString(column_name, buf); writeChar(' ', buf); writeRandomType(column_name, rng, buf, allow_suspicious_lc_types); } } } DataTypePtr FunctionGenerateRandomStructure::getReturnTypeImpl(const DataTypes & arguments) const { if (arguments.size() > 2) throw Exception( ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, expected from 0 to 2", getName(), arguments.size()); for (size_t i = 0; i != arguments.size(); ++i) { if (!isUnsignedInteger(arguments[i]) && !arguments[i]->onlyNull()) { throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the {} argument of function {}, expected unsigned integer or Null", arguments[i]->getName(), i + 1, getName()); } } return std::make_shared(); } ColumnPtr FunctionGenerateRandomStructure::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { size_t seed = randomSeed(); size_t number_of_columns = 0; if (!arguments.empty() && !arguments[0].column->onlyNull()) { number_of_columns = arguments[0].column->getUInt(0); if (number_of_columns > MAX_NUMBER_OF_COLUMNS) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Maximum allowed number of columns is {}, got {}", MAX_NUMBER_OF_COLUMNS, number_of_columns); } if (arguments.size() > 1 && !arguments[1].column->onlyNull()) seed = arguments[1].column->getUInt(0); pcg64 rng(seed); if (number_of_columns == 0) number_of_columns = generateNumberOfColumns(rng); auto col_res = ColumnString::create(); auto & string_column = assert_cast(*col_res); auto & chars = string_column.getChars(); WriteBufferFromVector buf(chars); writeRandomStructure(rng, number_of_columns, buf, allow_suspicious_lc_types); buf.finalize(); chars.push_back(0); string_column.getOffsets().push_back(chars.size()); return ColumnConst::create(std::move(col_res), input_rows_count); } String FunctionGenerateRandomStructure::generateRandomStructure(size_t seed, const ContextPtr & context) { pcg64 rng(seed); size_t number_of_columns = generateNumberOfColumns(rng); WriteBufferFromOwnString buf; writeRandomStructure(rng, number_of_columns, buf, context->getSettingsRef().allow_suspicious_low_cardinality_types); return buf.str(); } REGISTER_FUNCTION(GenerateRandomStructure) { factory.registerFunction( { .description=R"( Generates a random table structure. This function takes 2 optional constant arguments: the number of columns in the result structure (random by default) and random seed (random by default) The maximum number of columns is 128. The function returns a value of type String. )", .examples{ {"random", "SELECT generateRandomStructure()", "c1 UInt32, c2 FixedString(25)"}, {"with specified number of columns", "SELECT generateRandomStructure(3)", "c1 String, c2 Array(Int32), c3 LowCardinality(String)"}, {"with specified seed", "SELECT generateRandomStructure(1, 42)", "c1 UInt128"}, }, .categories{"Random"} }, FunctionFactory::CaseSensitive); } }