2018-06-10 19:22:49 +00:00
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <Interpreters/Context.h>
|
2019-03-22 12:08:30 +00:00
|
|
|
#include <Core/Settings.h>
|
2018-06-10 19:22:49 +00:00
|
|
|
#include <DataStreams/MaterializingBlockOutputStream.h>
|
|
|
|
#include <Formats/FormatSettings.h>
|
|
|
|
#include <Formats/FormatFactory.h>
|
2019-02-19 18:41:18 +00:00
|
|
|
#include <Processors/Formats/IRowInputFormat.h>
|
2019-07-24 18:00:09 +00:00
|
|
|
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
|
|
|
#include <Processors/Formats/OutputStreamToOutputFormat.h>
|
2018-06-10 19:22:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int UNKNOWN_FORMAT;
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT;
|
|
|
|
extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT;
|
|
|
|
}
|
|
|
|
|
2019-07-24 18:00:09 +00:00
|
|
|
//
|
|
|
|
//const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const
|
|
|
|
//{
|
|
|
|
// auto it = dict.find(name);
|
|
|
|
// if (dict.end() != it)
|
|
|
|
// return it->second;
|
|
|
|
// throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
|
|
|
|
//}
|
2018-06-10 19:22:49 +00:00
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
const FormatFactory::ProcessorCreators & FormatFactory::getProcessorCreators(const String & name) const
|
2018-06-10 19:22:49 +00:00
|
|
|
{
|
2019-02-19 18:41:18 +00:00
|
|
|
auto it = processors_dict.find(name);
|
|
|
|
if (processors_dict.end() != it)
|
|
|
|
return it->second;
|
|
|
|
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
|
|
|
|
}
|
2018-06-10 19:22:49 +00:00
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
static FormatSettings getInputFormatSetting(const Settings & settings)
|
|
|
|
{
|
2018-06-10 19:22:49 +00:00
|
|
|
FormatSettings format_settings;
|
|
|
|
format_settings.csv.delimiter = settings.format_csv_delimiter;
|
2018-07-04 21:00:50 +00:00
|
|
|
format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
|
|
|
|
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
|
2019-06-20 12:46:36 +00:00
|
|
|
format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
|
2018-06-10 19:22:49 +00:00
|
|
|
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
|
2019-04-22 13:31:17 +00:00
|
|
|
format_settings.with_names_use_header = settings.input_format_with_names_use_header;
|
2018-06-10 19:22:49 +00:00
|
|
|
format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields;
|
2018-09-14 13:43:57 +00:00
|
|
|
format_settings.import_nested_json = settings.input_format_import_nested_json;
|
2018-06-10 19:22:49 +00:00
|
|
|
format_settings.date_time_input_format = settings.date_time_input_format;
|
|
|
|
format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
|
|
|
|
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
return format_settings;
|
2018-06-10 19:22:49 +00:00
|
|
|
}
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
static FormatSettings getOutputFormatSetting(const Settings & settings)
|
2018-06-10 19:22:49 +00:00
|
|
|
{
|
|
|
|
FormatSettings format_settings;
|
|
|
|
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
|
|
|
|
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
|
2018-08-06 14:11:45 +00:00
|
|
|
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
|
2018-06-10 19:22:49 +00:00
|
|
|
format_settings.csv.delimiter = settings.format_csv_delimiter;
|
2018-07-04 21:00:50 +00:00
|
|
|
format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
|
|
|
|
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
|
2018-06-10 19:22:49 +00:00
|
|
|
format_settings.pretty.max_rows = settings.output_format_pretty_max_rows;
|
2018-08-30 23:34:12 +00:00
|
|
|
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
|
2018-06-10 19:22:49 +00:00
|
|
|
format_settings.pretty.color = settings.output_format_pretty_color;
|
|
|
|
format_settings.write_statistics = settings.output_format_write_statistics;
|
2019-02-19 20:51:44 +00:00
|
|
|
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
|
2018-06-10 19:22:49 +00:00
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
return format_settings;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-07-08 13:00:54 +00:00
|
|
|
BlockInputStreamPtr FormatFactory::getInput(
|
|
|
|
const String & name,
|
|
|
|
ReadBuffer & buf,
|
|
|
|
const Block & sample,
|
|
|
|
const Context & context,
|
|
|
|
UInt64 max_block_size,
|
|
|
|
UInt64 rows_portion_size,
|
|
|
|
ReadCallback callback) const
|
2019-02-19 18:41:18 +00:00
|
|
|
{
|
2019-07-24 18:00:09 +00:00
|
|
|
auto format = getInputFormat(name, buf, sample, context, max_block_size, rows_portion_size, std::move(callback));
|
|
|
|
return std::make_shared<InputStreamFromInputFormat>(std::move(format));
|
2019-02-19 18:41:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer & buf, const Block & sample, const Context & context) const
|
|
|
|
{
|
2019-07-24 18:00:09 +00:00
|
|
|
auto format = getOutputFormat(name, buf, sample, context);
|
2019-02-19 18:41:18 +00:00
|
|
|
|
2018-06-10 19:22:49 +00:00
|
|
|
/** Materialization is needed, because formats can use the functions `IDataType`,
|
|
|
|
* which only work with full columns.
|
|
|
|
*/
|
2019-07-24 18:00:09 +00:00
|
|
|
return std::make_shared<MaterializingBlockOutputStream>(std::make_shared<OutputStreamToOutputFormat>(format));
|
2018-06-10 19:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-07-24 18:00:09 +00:00
|
|
|
InputFormatPtr FormatFactory::getInputFormat(
|
|
|
|
const String & name,
|
|
|
|
ReadBuffer & buf,
|
|
|
|
const Block & sample,
|
|
|
|
const Context & context,
|
|
|
|
UInt64 max_block_size,
|
|
|
|
UInt64 rows_portion_size,
|
|
|
|
ReadCallback callback) const
|
2019-02-19 18:41:18 +00:00
|
|
|
{
|
|
|
|
const auto & input_getter = getProcessorCreators(name).first;
|
|
|
|
if (!input_getter)
|
|
|
|
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
|
|
|
|
|
|
|
|
const Settings & settings = context.getSettingsRef();
|
|
|
|
FormatSettings format_settings = getInputFormatSetting(settings);
|
|
|
|
|
|
|
|
RowInputFormatParams params;
|
|
|
|
params.max_block_size = max_block_size;
|
|
|
|
params.allow_errors_num = format_settings.input_allow_errors_num;
|
|
|
|
params.allow_errors_ratio = format_settings.input_allow_errors_ratio;
|
2019-07-24 18:00:09 +00:00
|
|
|
params.rows_portion_size = rows_portion_size;
|
|
|
|
params.callback = std::move(callback);
|
2019-02-19 18:41:18 +00:00
|
|
|
|
|
|
|
return input_getter(buf, sample, context, params, format_settings);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
OutputFormatPtr FormatFactory::getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample, const Context & context) const
|
|
|
|
{
|
|
|
|
const auto & output_getter = getProcessorCreators(name).second;
|
|
|
|
if (!output_getter)
|
|
|
|
throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
|
|
|
|
|
|
|
|
const Settings & settings = context.getSettingsRef();
|
|
|
|
FormatSettings format_settings = getOutputFormatSetting(settings);
|
|
|
|
|
|
|
|
/** TODO: Materialization is needed, because formats can use the functions `IDataType`,
|
|
|
|
* which only work with full columns.
|
|
|
|
*/
|
|
|
|
return output_getter(buf, sample, context, format_settings);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-07-24 18:00:09 +00:00
|
|
|
void FormatFactory::registerInputFormat(const String & /*name*/, InputCreator /*input_creator*/)
|
2018-06-10 19:22:49 +00:00
|
|
|
{
|
2019-07-24 18:00:09 +00:00
|
|
|
// auto & target = dict[name].first;
|
|
|
|
// if (target)
|
|
|
|
// throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
// target = std::move(input_creator);
|
2018-06-10 19:22:49 +00:00
|
|
|
}
|
|
|
|
|
2019-07-24 18:00:09 +00:00
|
|
|
void FormatFactory::registerOutputFormat(const String & /*name*/, OutputCreator /*output_creator*/)
|
2018-06-10 19:22:49 +00:00
|
|
|
{
|
2019-07-24 18:00:09 +00:00
|
|
|
// auto & target = dict[name].second;
|
|
|
|
// if (target)
|
|
|
|
// throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
// target = std::move(output_creator);
|
2019-02-19 18:41:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void FormatFactory::registerInputFormatProcessor(const String & name, InputProcessorCreator input_creator)
|
|
|
|
{
|
|
|
|
auto & target = processors_dict[name].first;
|
|
|
|
if (target)
|
|
|
|
throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
target = std::move(input_creator);
|
|
|
|
}
|
|
|
|
|
|
|
|
void FormatFactory::registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator)
|
|
|
|
{
|
|
|
|
auto & target = processors_dict[name].second;
|
|
|
|
if (target)
|
|
|
|
throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
target = std::move(output_creator);
|
2018-06-10 19:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Formats for both input/output.
|
|
|
|
|
|
|
|
void registerInputFormatNative(FormatFactory & factory);
|
|
|
|
void registerOutputFormatNative(FormatFactory & factory);
|
|
|
|
void registerInputFormatRowBinary(FormatFactory & factory);
|
|
|
|
void registerOutputFormatRowBinary(FormatFactory & factory);
|
|
|
|
void registerInputFormatTabSeparated(FormatFactory & factory);
|
|
|
|
void registerOutputFormatTabSeparated(FormatFactory & factory);
|
|
|
|
void registerInputFormatValues(FormatFactory & factory);
|
|
|
|
void registerOutputFormatValues(FormatFactory & factory);
|
|
|
|
void registerInputFormatCSV(FormatFactory & factory);
|
|
|
|
void registerOutputFormatCSV(FormatFactory & factory);
|
|
|
|
void registerInputFormatTSKV(FormatFactory & factory);
|
|
|
|
void registerOutputFormatTSKV(FormatFactory & factory);
|
|
|
|
void registerInputFormatJSONEachRow(FormatFactory & factory);
|
|
|
|
void registerOutputFormatJSONEachRow(FormatFactory & factory);
|
2019-02-19 20:51:44 +00:00
|
|
|
void registerInputFormatParquet(FormatFactory & factory);
|
|
|
|
void registerOutputFormatParquet(FormatFactory & factory);
|
2019-02-19 19:36:55 +00:00
|
|
|
void registerInputFormatProtobuf(FormatFactory & factory);
|
2019-01-23 19:42:20 +00:00
|
|
|
void registerOutputFormatProtobuf(FormatFactory & factory);
|
2018-06-10 19:22:49 +00:00
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
void registerInputFormatProcessorNative(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorNative(FormatFactory & factory);
|
|
|
|
void registerInputFormatProcessorRowBinary(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorRowBinary(FormatFactory & factory);
|
|
|
|
void registerInputFormatProcessorTabSeparated(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorTabSeparated(FormatFactory & factory);
|
|
|
|
void registerInputFormatProcessorValues(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorValues(FormatFactory & factory);
|
|
|
|
void registerInputFormatProcessorCSV(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorCSV(FormatFactory & factory);
|
|
|
|
void registerInputFormatProcessorTSKV(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorTSKV(FormatFactory & factory);
|
|
|
|
void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
|
2019-02-21 18:36:46 +00:00
|
|
|
void registerInputFormatProcessorParquet(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorParquet(FormatFactory & factory);
|
|
|
|
void registerInputFormatProcessorProtobuf(FormatFactory & factory);
|
2019-02-19 18:41:18 +00:00
|
|
|
void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
|
|
|
|
|
2018-06-10 19:22:49 +00:00
|
|
|
/// Output only (presentational) formats.
|
|
|
|
|
|
|
|
void registerOutputFormatPretty(FormatFactory & factory);
|
|
|
|
void registerOutputFormatPrettyCompact(FormatFactory & factory);
|
|
|
|
void registerOutputFormatPrettySpace(FormatFactory & factory);
|
|
|
|
void registerOutputFormatVertical(FormatFactory & factory);
|
|
|
|
void registerOutputFormatJSON(FormatFactory & factory);
|
|
|
|
void registerOutputFormatJSONCompact(FormatFactory & factory);
|
|
|
|
void registerOutputFormatXML(FormatFactory & factory);
|
|
|
|
void registerOutputFormatODBCDriver(FormatFactory & factory);
|
2018-08-10 01:20:10 +00:00
|
|
|
void registerOutputFormatODBCDriver2(FormatFactory & factory);
|
2018-06-10 19:22:49 +00:00
|
|
|
void registerOutputFormatNull(FormatFactory & factory);
|
2019-05-26 06:52:29 +00:00
|
|
|
void registerOutputFormatMySQLWire(FormatFactory & factory);
|
2018-06-10 19:22:49 +00:00
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
void registerOutputFormatProcessorPretty(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorPrettySpace(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorVertical(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorJSON(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorXML(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorODBCDriver(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
|
|
|
|
void registerOutputFormatProcessorNull(FormatFactory & factory);
|
2019-07-04 18:55:20 +00:00
|
|
|
void registerOutputFormatProcessorMySQLWrite(FormatFactory & factory);
|
2019-02-19 18:41:18 +00:00
|
|
|
|
2018-06-10 19:22:49 +00:00
|
|
|
/// Input only formats.
|
|
|
|
|
|
|
|
void registerInputFormatCapnProto(FormatFactory & factory);
|
2019-02-19 18:41:18 +00:00
|
|
|
void registerInputFormatProcessorCapnProto(FormatFactory & factory);
|
2018-06-10 19:22:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
FormatFactory::FormatFactory()
|
|
|
|
{
|
|
|
|
registerInputFormatNative(*this);
|
|
|
|
registerOutputFormatNative(*this);
|
|
|
|
registerInputFormatRowBinary(*this);
|
|
|
|
registerOutputFormatRowBinary(*this);
|
|
|
|
registerInputFormatTabSeparated(*this);
|
|
|
|
registerOutputFormatTabSeparated(*this);
|
|
|
|
registerInputFormatValues(*this);
|
|
|
|
registerOutputFormatValues(*this);
|
|
|
|
registerInputFormatCSV(*this);
|
|
|
|
registerOutputFormatCSV(*this);
|
|
|
|
registerInputFormatTSKV(*this);
|
|
|
|
registerOutputFormatTSKV(*this);
|
|
|
|
registerInputFormatJSONEachRow(*this);
|
|
|
|
registerOutputFormatJSONEachRow(*this);
|
2019-02-19 19:36:55 +00:00
|
|
|
registerInputFormatProtobuf(*this);
|
2019-01-23 19:42:20 +00:00
|
|
|
registerOutputFormatProtobuf(*this);
|
2018-06-10 19:22:49 +00:00
|
|
|
registerInputFormatCapnProto(*this);
|
2019-02-19 20:51:44 +00:00
|
|
|
registerInputFormatParquet(*this);
|
|
|
|
registerOutputFormatParquet(*this);
|
2018-06-10 19:22:49 +00:00
|
|
|
|
2019-06-25 17:00:54 +00:00
|
|
|
registerOutputFormatMySQLWire(*this);
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
registerInputFormatProcessorNative(*this);
|
|
|
|
registerOutputFormatProcessorNative(*this);
|
|
|
|
registerInputFormatProcessorRowBinary(*this);
|
|
|
|
registerOutputFormatProcessorRowBinary(*this);
|
|
|
|
registerInputFormatProcessorTabSeparated(*this);
|
|
|
|
registerOutputFormatProcessorTabSeparated(*this);
|
|
|
|
registerInputFormatProcessorValues(*this);
|
|
|
|
registerOutputFormatProcessorValues(*this);
|
|
|
|
registerInputFormatProcessorCSV(*this);
|
|
|
|
registerOutputFormatProcessorCSV(*this);
|
|
|
|
registerInputFormatProcessorTSKV(*this);
|
|
|
|
registerOutputFormatProcessorTSKV(*this);
|
|
|
|
registerInputFormatProcessorJSONEachRow(*this);
|
|
|
|
registerOutputFormatProcessorJSONEachRow(*this);
|
2019-02-21 18:36:46 +00:00
|
|
|
registerInputFormatProcessorProtobuf(*this);
|
2019-02-19 18:41:18 +00:00
|
|
|
registerOutputFormatProcessorProtobuf(*this);
|
|
|
|
registerInputFormatProcessorCapnProto(*this);
|
2019-02-21 18:36:46 +00:00
|
|
|
registerInputFormatProcessorParquet(*this);
|
|
|
|
registerOutputFormatProcessorParquet(*this);
|
2019-02-19 18:41:18 +00:00
|
|
|
|
2018-06-10 19:22:49 +00:00
|
|
|
registerOutputFormatPretty(*this);
|
|
|
|
registerOutputFormatPrettyCompact(*this);
|
|
|
|
registerOutputFormatPrettySpace(*this);
|
|
|
|
registerOutputFormatVertical(*this);
|
|
|
|
registerOutputFormatJSON(*this);
|
|
|
|
registerOutputFormatJSONCompact(*this);
|
|
|
|
registerOutputFormatXML(*this);
|
|
|
|
registerOutputFormatODBCDriver(*this);
|
2018-08-10 01:20:10 +00:00
|
|
|
registerOutputFormatODBCDriver2(*this);
|
2018-06-10 19:22:49 +00:00
|
|
|
registerOutputFormatNull(*this);
|
2019-02-19 18:41:18 +00:00
|
|
|
|
|
|
|
registerOutputFormatProcessorPretty(*this);
|
|
|
|
registerOutputFormatProcessorPrettyCompact(*this);
|
|
|
|
registerOutputFormatProcessorPrettySpace(*this);
|
|
|
|
registerOutputFormatProcessorVertical(*this);
|
|
|
|
registerOutputFormatProcessorJSON(*this);
|
|
|
|
registerOutputFormatProcessorJSONCompact(*this);
|
|
|
|
registerOutputFormatProcessorXML(*this);
|
|
|
|
registerOutputFormatProcessorODBCDriver(*this);
|
|
|
|
registerOutputFormatProcessorODBCDriver2(*this);
|
|
|
|
registerOutputFormatProcessorNull(*this);
|
2019-07-04 18:55:20 +00:00
|
|
|
registerOutputFormatProcessorMySQLWrite(*this);
|
2018-06-10 19:22:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|