mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-18 20:32:43 +00:00
134 lines
4.0 KiB
C++
134 lines
4.0 KiB
C++
#include <Formats/StructureToFormatSchemaUtils.h>
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int BAD_ARGUMENTS;
|
|
}
|
|
|
|
namespace StructureToFormatSchemaUtils
|
|
{
|
|
|
|
void writeIndent(WriteBuffer & buf, size_t indent)
|
|
{
|
|
writeChar(' ', indent * 4, buf);
|
|
}
|
|
|
|
void startNested(WriteBuffer & buf, const String & nested_name, const String & nested_type, size_t indent)
|
|
{
|
|
writeIndent(buf, indent);
|
|
writeString(nested_type, buf);
|
|
if (!nested_name.empty())
|
|
{
|
|
writeChar(' ', buf);
|
|
writeString(nested_name, buf);
|
|
}
|
|
writeChar('\n', buf);
|
|
writeIndent(buf, indent);
|
|
writeCString("{\n", buf);
|
|
}
|
|
|
|
void endNested(WriteBuffer & buf, size_t indent)
|
|
{
|
|
writeIndent(buf, indent);
|
|
writeCString("}\n", buf);
|
|
}
|
|
|
|
String getSchemaFieldName(const String & column_name)
|
|
{
|
|
String result = column_name;
|
|
/// Replace all first uppercase letters to lower-case,
|
|
/// because fields in CapnProto schema must begin with a lower-case letter.
|
|
/// Don't replace all letters to lower-case to remain camelCase field names.
|
|
for (auto & symbol : result)
|
|
{
|
|
if (islower(symbol))
|
|
break;
|
|
symbol = tolower(symbol);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
String getSchemaMessageName(const String & column_name)
|
|
{
|
|
String result = column_name;
|
|
if (!column_name.empty() && isalpha(column_name[0]))
|
|
result[0] = toupper(column_name[0]);
|
|
return result;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
std::pair<String, String> splitName(const String & name, bool allow_split_by_underscore)
|
|
{
|
|
const auto * begin = name.data();
|
|
const auto * end = name.data() + name.size();
|
|
const char * it = nullptr;
|
|
if (allow_split_by_underscore)
|
|
it = find_first_symbols<'_', '.'>(begin, end);
|
|
else
|
|
it = find_first_symbols<'.'>(begin, end);
|
|
String first = String(begin, it);
|
|
String second = it == end ? "" : String(it + 1, end);
|
|
return {std::move(first), std::move(second)};
|
|
}
|
|
}
|
|
|
|
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types, bool allow_split_by_underscore, const String & format_name)
|
|
{
|
|
/// Find all columns with dots '.' or underscores '_' (if allowed) and move them into a tuple.
|
|
/// For example if we have columns 'a.b UInt32, a.c UInt32, x_y String' we will
|
|
/// change it to 'a Tuple(b UInt32, c UInt32), x Tuple(y String)'
|
|
NamesAndTypesList result;
|
|
std::unordered_map<String, NamesAndTypesList> nested;
|
|
for (const auto & [name, type] : names_and_types)
|
|
{
|
|
auto [field_name, nested_name] = splitName(name, allow_split_by_underscore);
|
|
if (isdigit(field_name[0]))
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format {} doesn't support field names that starts with a digit: '{}'", format_name, field_name);
|
|
|
|
if (nested_name.empty())
|
|
result.emplace_back(name, type);
|
|
else
|
|
nested[field_name].emplace_back(nested_name, type);
|
|
}
|
|
|
|
/// Collect nested recursively.
|
|
for (auto & [field_name, elements] : nested)
|
|
elements = collectNested(elements, allow_split_by_underscore, format_name);
|
|
|
|
for (const auto & [field_name, elements]: nested)
|
|
result.emplace_back(field_name, std::make_shared<DataTypeTuple>(elements.getTypes(), elements.getNames()));
|
|
|
|
return result;
|
|
}
|
|
|
|
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type, bool allow_split_by_underscore, const String & format_name)
|
|
{
|
|
const auto & nested_types = tuple_type.getElements();
|
|
Names nested_names;
|
|
if (tuple_type.haveExplicitNames())
|
|
{
|
|
nested_names = tuple_type.getElementNames();
|
|
}
|
|
else
|
|
{
|
|
nested_names.reserve(nested_types.size());
|
|
for (size_t i = 0; i != nested_types.size(); ++i)
|
|
nested_names.push_back("e" + std::to_string(i + 1));
|
|
}
|
|
|
|
NamesAndTypesList result;
|
|
for (size_t i = 0; i != nested_names.size(); ++i)
|
|
result.emplace_back(nested_names[i], nested_types[i]);
|
|
|
|
return collectNested(result, allow_split_by_underscore, format_name);
|
|
}
|
|
|
|
}
|
|
|
|
}
|