2019-01-23 19:28:13 +00:00
|
|
|
#include <Formats/FormatSchemaInfo.h>
|
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Common/Exception.h>
|
2023-06-22 17:58:57 +00:00
|
|
|
#include <Common/filesystemHelpers.h>
|
|
|
|
#include <Disks/IO/WriteBufferFromTemporaryFile.h>
|
2021-05-16 18:36:55 +00:00
|
|
|
#include <filesystem>
|
2019-01-23 19:28:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int BAD_ARGUMENTS;
|
|
|
|
}
|
|
|
|
|
2021-05-16 18:36:55 +00:00
|
|
|
namespace fs = std::filesystem;
|
2019-03-29 14:37:08 +00:00
|
|
|
|
|
|
|
namespace
|
2019-01-23 19:28:13 +00:00
|
|
|
{
|
2019-03-29 14:37:08 +00:00
|
|
|
String getFormatSchemaDefaultFileExtension(const String & format)
|
2019-01-23 19:28:13 +00:00
|
|
|
{
|
2019-03-29 14:37:08 +00:00
|
|
|
if (format == "Protobuf")
|
|
|
|
return "proto";
|
|
|
|
else if (format == "CapnProto")
|
|
|
|
return "capnp";
|
|
|
|
else
|
|
|
|
return "";
|
2019-01-23 19:28:13 +00:00
|
|
|
}
|
2019-03-29 14:37:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-25 19:17:41 +00:00
|
|
|
FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String & format, bool require_message, bool is_server, const std::string & format_schema_path)
|
2019-03-29 14:37:08 +00:00
|
|
|
{
|
|
|
|
if (format_schema.empty())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The format {} requires a schema. The corresponding setting should be set", format);
|
2019-03-29 14:37:08 +00:00
|
|
|
|
|
|
|
String default_file_extension = getFormatSchemaDefaultFileExtension(format);
|
2019-01-23 19:28:13 +00:00
|
|
|
|
2021-05-16 18:36:55 +00:00
|
|
|
fs::path path;
|
2019-09-24 14:25:22 +00:00
|
|
|
if (require_message)
|
2019-01-23 19:28:13 +00:00
|
|
|
{
|
2019-09-24 14:25:22 +00:00
|
|
|
size_t colon_pos = format_schema.find(':');
|
2021-05-16 18:36:55 +00:00
|
|
|
if ((colon_pos == String::npos) || (colon_pos == 0) || (colon_pos == format_schema.length() - 1))
|
2019-09-24 14:25:22 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
|
|
"Format schema requires the 'format_schema' setting to have the 'schema_file:message_name' format{}. Got '{}'",
|
|
|
|
(default_file_extension.empty() ? "" : ", e.g. 'schema." + default_file_extension + ":Message'"), format_schema);
|
2021-05-16 18:36:55 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
path = fs::path(format_schema.substr(0, colon_pos));
|
|
|
|
String filename = path.has_filename() ? path.filename() : path.parent_path().filename();
|
|
|
|
if (filename.empty())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
|
|
"Format schema requires the 'format_schema' setting to have the 'schema_file:message_name' format{}. Got '{}'",
|
|
|
|
(default_file_extension.empty() ? "" : ", e.g. 'schema." + default_file_extension + ":Message'"), format_schema);
|
2019-09-24 14:25:22 +00:00
|
|
|
}
|
|
|
|
message_name = format_schema.substr(colon_pos + 1);
|
|
|
|
}
|
|
|
|
else
|
2021-05-16 18:36:55 +00:00
|
|
|
{
|
|
|
|
path = fs::path(format_schema);
|
|
|
|
if (!path.has_filename())
|
|
|
|
path = path.parent_path() / "";
|
|
|
|
}
|
2019-01-27 09:15:32 +00:00
|
|
|
|
2019-12-25 19:17:41 +00:00
|
|
|
auto default_schema_directory = [&format_schema_path]()
|
2019-01-23 19:28:13 +00:00
|
|
|
{
|
2021-05-23 07:56:13 +00:00
|
|
|
static const String str = fs::canonical(format_schema_path) / "";
|
2019-01-27 09:15:32 +00:00
|
|
|
return str;
|
|
|
|
};
|
2019-01-23 19:28:13 +00:00
|
|
|
|
2021-05-16 18:36:55 +00:00
|
|
|
if (!path.has_extension() && !default_file_extension.empty())
|
|
|
|
path = path.parent_path() / (path.stem().string() + '.' + default_file_extension);
|
2019-01-27 09:15:32 +00:00
|
|
|
|
2021-05-31 11:06:32 +00:00
|
|
|
fs::path default_schema_directory_path(default_schema_directory());
|
2021-05-16 18:36:55 +00:00
|
|
|
if (path.is_absolute())
|
2019-01-27 09:15:32 +00:00
|
|
|
{
|
2019-12-25 19:17:41 +00:00
|
|
|
if (is_server)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Absolute path in the 'format_schema' setting is prohibited: {}", path.string());
|
2021-05-16 18:36:55 +00:00
|
|
|
schema_path = path.filename();
|
|
|
|
schema_directory = path.parent_path() / "";
|
2019-01-27 09:15:32 +00:00
|
|
|
}
|
2021-05-31 11:06:32 +00:00
|
|
|
else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
|
2019-01-27 09:15:32 +00:00
|
|
|
{
|
2019-12-25 19:17:41 +00:00
|
|
|
if (is_server)
|
2022-03-28 09:48:17 +00:00
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::BAD_ARGUMENTS,
|
|
|
|
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
|
|
|
default_schema_directory(),
|
|
|
|
path.string(),
|
|
|
|
default_schema_directory());
|
2021-05-31 11:06:32 +00:00
|
|
|
path = default_schema_directory_path / path;
|
2021-05-16 18:36:55 +00:00
|
|
|
schema_path = path.filename();
|
|
|
|
schema_directory = path.parent_path() / "";
|
2019-01-27 09:15:32 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-05-16 18:36:55 +00:00
|
|
|
schema_path = path;
|
2019-01-27 09:15:32 +00:00
|
|
|
schema_directory = default_schema_directory();
|
2019-01-23 19:28:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-28 12:59:22 +00:00
|
|
|
FormatSchemaInfo::FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message)
|
|
|
|
: FormatSchemaInfo(
|
|
|
|
settings.schema.format_schema, format, require_message, settings.schema.is_server, settings.schema.format_schema_path)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2023-06-22 17:58:57 +00:00
|
|
|
template <typename SchemaGenerator>
|
|
|
|
MaybeAutogeneratedFormatSchemaInfo<SchemaGenerator>::MaybeAutogeneratedFormatSchemaInfo(
|
|
|
|
const FormatSettings & settings, const String & format, const Block & header, bool use_autogenerated_schema)
|
|
|
|
{
|
|
|
|
if (!use_autogenerated_schema || !settings.schema.format_schema.empty())
|
|
|
|
{
|
|
|
|
schema_info = std::make_unique<FormatSchemaInfo>(settings, format, true);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
String schema_path;
|
|
|
|
fs::path default_schema_directory_path(fs::canonical(settings.schema.format_schema_path) / "");
|
|
|
|
fs::path path;
|
|
|
|
if (!settings.schema.output_format_schema.empty())
|
|
|
|
{
|
|
|
|
schema_path = settings.schema.output_format_schema;
|
|
|
|
path = schema_path;
|
|
|
|
if (path.is_absolute())
|
|
|
|
{
|
|
|
|
if (settings.schema.is_server)
|
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Absolute path in the 'output_format_schema' setting is prohibited: {}", path.string());
|
|
|
|
}
|
|
|
|
else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
|
|
|
|
{
|
|
|
|
if (settings.schema.is_server)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::BAD_ARGUMENTS,
|
|
|
|
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
|
|
|
default_schema_directory_path.string(),
|
|
|
|
path.string(),
|
|
|
|
default_schema_directory_path.string());
|
|
|
|
path = default_schema_directory_path / path;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
path = default_schema_directory_path / path;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (settings.schema.is_server)
|
|
|
|
{
|
|
|
|
tmp_file_path = PocoTemporaryFile::tempName(default_schema_directory_path.string()) + '.' + getFormatSchemaDefaultFileExtension(format);
|
|
|
|
schema_path = fs::path(tmp_file_path).filename();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
tmp_file_path = PocoTemporaryFile::tempName() + '.' + getFormatSchemaDefaultFileExtension(format);
|
|
|
|
schema_path = tmp_file_path;
|
|
|
|
}
|
|
|
|
|
|
|
|
path = tmp_file_path;
|
|
|
|
}
|
|
|
|
|
|
|
|
WriteBufferFromFile buf(path.string());
|
|
|
|
SchemaGenerator::writeSchema(buf, "Message", header.getNamesAndTypesList());
|
|
|
|
buf.finalize();
|
|
|
|
|
|
|
|
schema_info = std::make_unique<FormatSchemaInfo>(schema_path + ":Message", format, true, settings.schema.is_server, settings.schema.format_schema_path);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename SchemaGenerator>
|
|
|
|
MaybeAutogeneratedFormatSchemaInfo<SchemaGenerator>::~MaybeAutogeneratedFormatSchemaInfo()
|
|
|
|
{
|
|
|
|
if (!tmp_file_path.empty())
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
fs::remove(tmp_file_path);
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
tryLogCurrentException("MaybeAutogeneratedFormatSchemaInfo", "Cannot delete temporary schema file");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template class MaybeAutogeneratedFormatSchemaInfo<StructureToCapnProtoSchema>;
|
|
|
|
template class MaybeAutogeneratedFormatSchemaInfo<StructureToProtobufSchema>;
|
|
|
|
|
2019-01-23 19:28:13 +00:00
|
|
|
}
|