#include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; extern const int BAD_ARGUMENTS; } ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional & format_settings, ReadBufferCreator read_buffer_creator, ContextPtr context) { NamesAndTypesList names_and_types; if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name)) { auto external_schema_reader = FormatFactory::instance().getExternalSchemaReader(format_name, context, format_settings); try { names_and_types = external_schema_reader->readSchema(); } catch (const DB::Exception & e) { throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message()); } } else if (FormatFactory::instance().checkIfFormatHasSchemaReader(format_name)) { auto read_buf = read_buffer_creator(); if (read_buf->eof()) throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file, file is empty", format_name); auto schema_reader = FormatFactory::instance().getSchemaReader(format_name, *read_buf, context, format_settings); try { names_and_types = schema_reader->readSchema(); } catch (const DB::Exception & e) { throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message()); } } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference", format_name); return ColumnsDescription(names_and_types); } DataTypePtr generalizeDataType(DataTypePtr type) { WhichDataType which(type); if (which.isNothing()) return nullptr; if (which.isNullable()) { const auto * nullable_type = assert_cast(type.get()); return generalizeDataType(nullable_type->getNestedType()); } if (isNumber(type)) return makeNullable(std::make_shared()); if (which.isArray()) { const auto * array_type = assert_cast(type.get()); auto nested_type = generalizeDataType(array_type->getNestedType()); return nested_type ? std::make_shared(nested_type) : nullptr; } if (which.isTuple()) { const auto * tuple_type = assert_cast(type.get()); DataTypes nested_types; for (const auto & element : tuple_type->getElements()) { auto nested_type = generalizeDataType(element); if (!nested_type) return nullptr; nested_types.push_back(nested_type); } return std::make_shared(std::move(nested_types)); } if (which.isMap()) { const auto * map_type = assert_cast(type.get()); auto key_type = generalizeDataType(map_type->getKeyType()); auto value_type = generalizeDataType(map_type->getValueType()); return key_type && value_type ? std::make_shared(key_type, value_type) : nullptr; } if (which.isLowCarnality()) { const auto * lc_type = assert_cast(type.get()); auto nested_type = generalizeDataType(lc_type->getDictionaryType()); return nested_type ? std::make_shared(nested_type) : nullptr; } return makeNullable(type); } }