2017-10-04 00:00:22 +00:00
|
|
|
#include <Common/config.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <DataStreams/NativeBlockInputStream.h>
|
|
|
|
#include <DataStreams/NativeBlockOutputStream.h>
|
|
|
|
#include <DataStreams/TabSeparatedRowInputStream.h>
|
|
|
|
#include <DataStreams/TabSeparatedRowOutputStream.h>
|
|
|
|
#include <DataStreams/TabSeparatedRawRowOutputStream.h>
|
|
|
|
#include <DataStreams/BinaryRowInputStream.h>
|
|
|
|
#include <DataStreams/BinaryRowOutputStream.h>
|
|
|
|
#include <DataStreams/ValuesRowInputStream.h>
|
|
|
|
#include <DataStreams/ValuesRowOutputStream.h>
|
|
|
|
#include <DataStreams/PrettyBlockOutputStream.h>
|
|
|
|
#include <DataStreams/PrettyCompactBlockOutputStream.h>
|
|
|
|
#include <DataStreams/PrettySpaceBlockOutputStream.h>
|
|
|
|
#include <DataStreams/VerticalRowOutputStream.h>
|
|
|
|
#include <DataStreams/NullBlockOutputStream.h>
|
|
|
|
#include <DataStreams/BlockInputStreamFromRowInputStream.h>
|
|
|
|
#include <DataStreams/BlockOutputStreamFromRowOutputStream.h>
|
|
|
|
#include <DataStreams/JSONRowOutputStream.h>
|
|
|
|
#include <DataStreams/JSONCompactRowOutputStream.h>
|
|
|
|
#include <DataStreams/JSONEachRowRowOutputStream.h>
|
|
|
|
#include <DataStreams/JSONEachRowRowInputStream.h>
|
|
|
|
#include <DataStreams/XMLRowOutputStream.h>
|
|
|
|
#include <DataStreams/TSKVRowOutputStream.h>
|
|
|
|
#include <DataStreams/TSKVRowInputStream.h>
|
|
|
|
#include <DataStreams/ODBCDriverBlockOutputStream.h>
|
|
|
|
#include <DataStreams/CSVRowInputStream.h>
|
|
|
|
#include <DataStreams/CSVRowOutputStream.h>
|
|
|
|
#include <DataStreams/MaterializingBlockOutputStream.h>
|
|
|
|
#include <DataStreams/FormatFactory.h>
|
2017-07-11 21:41:37 +00:00
|
|
|
#include <DataStreams/SquashingBlockOutputStream.h>
|
2017-07-05 16:28:57 +00:00
|
|
|
#include <DataTypes/FormatSettingsJSON.h>
|
2017-10-04 00:00:22 +00:00
|
|
|
#if USE_CAPNP
|
2017-10-23 19:08:58 +00:00
|
|
|
#include <DataStreams/CapnProtoRowInputStream.h>
|
2017-10-04 00:00:22 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <boost/algorithm/string.hpp>
|
2011-10-24 12:10:59 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT;
|
|
|
|
extern const int UNKNOWN_FORMAT;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-10-24 12:10:59 +00:00
|
|
|
BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & buf,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Block & sample, const Context & context, size_t max_block_size) const
|
2011-10-24 12:10:59 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const Settings & settings = context.getSettingsRef();
|
2016-12-08 00:25:52 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
auto wrap_row_stream = [&](auto && row_stream)
|
|
|
|
{
|
|
|
|
return std::make_shared<BlockInputStreamFromRowInputStream>(std::move(row_stream), sample, max_block_size,
|
|
|
|
settings.input_format_allow_errors_num, settings.input_format_allow_errors_ratio);
|
|
|
|
};
|
2017-01-27 04:29:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (name == "Native")
|
|
|
|
{
|
2018-02-18 02:46:39 +00:00
|
|
|
return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else if (name == "RowBinary")
|
|
|
|
{
|
2017-12-15 00:01:59 +00:00
|
|
|
return wrap_row_stream(std::make_shared<BinaryRowInputStream>(buf, sample));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else if (name == "TabSeparated" || name == "TSV") /// TSV is a synonym/alias for the original TabSeparated format
|
|
|
|
{
|
|
|
|
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample));
|
|
|
|
}
|
|
|
|
else if (name == "TabSeparatedWithNames" || name == "TSVWithNames")
|
|
|
|
{
|
|
|
|
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample, true));
|
|
|
|
}
|
|
|
|
else if (name == "TabSeparatedWithNamesAndTypes" || name == "TSVWithNamesAndTypes")
|
|
|
|
{
|
|
|
|
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample, true, true));
|
|
|
|
}
|
|
|
|
else if (name == "Values")
|
|
|
|
{
|
2017-12-15 00:01:59 +00:00
|
|
|
return wrap_row_stream(std::make_shared<ValuesRowInputStream>(buf, sample, context, settings.input_format_values_interpret_expressions));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2018-04-21 21:11:43 +00:00
|
|
|
else if (name == "CSV" || name == "CSVWithNames")
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-04-21 23:38:47 +00:00
|
|
|
char csv_delimiter = settings.format_csv_delimiter;
|
2018-04-21 21:11:43 +00:00
|
|
|
bool with_names = name == "CSVWithNames";
|
2018-04-21 23:38:47 +00:00
|
|
|
|
|
|
|
return wrap_row_stream(std::make_shared<CSVRowInputStream>(buf, sample, csv_delimiter, with_names));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else if (name == "TSKV")
|
|
|
|
{
|
|
|
|
return wrap_row_stream(std::make_shared<TSKVRowInputStream>(buf, sample, settings.input_format_skip_unknown_fields));
|
|
|
|
}
|
|
|
|
else if (name == "JSONEachRow")
|
|
|
|
{
|
|
|
|
return wrap_row_stream(std::make_shared<JSONEachRowRowInputStream>(buf, sample, settings.input_format_skip_unknown_fields));
|
|
|
|
}
|
2017-10-04 00:00:22 +00:00
|
|
|
#if USE_CAPNP
|
|
|
|
else if (name == "CapnProto")
|
|
|
|
{
|
|
|
|
std::vector<String> tokens;
|
|
|
|
auto schema_and_root = settings.format_schema.toString();
|
|
|
|
boost::split(tokens, schema_and_root, boost::is_any_of(":"));
|
|
|
|
if (tokens.size() != 2)
|
2017-11-10 06:48:28 +00:00
|
|
|
throw Exception("Format CapnProto requires 'format_schema' setting to have a schema_file:root_object format, e.g. 'schema.capnp:Message'");
|
2017-10-04 00:00:22 +00:00
|
|
|
|
2017-11-10 06:48:28 +00:00
|
|
|
const String & schema_dir = context.getFormatSchemaPath();
|
|
|
|
return wrap_row_stream(std::make_shared<CapnProtoRowInputStream>(buf, sample, schema_dir, tokens[0], tokens[1]));
|
2017-10-04 00:00:22 +00:00
|
|
|
}
|
|
|
|
#endif
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "TabSeparatedRaw"
|
|
|
|
|| name == "TSVRaw"
|
|
|
|
|| name == "Pretty"
|
|
|
|
|| name == "PrettyCompact"
|
|
|
|
|| name == "PrettyCompactMonoBlock"
|
|
|
|
|| name == "PrettySpace"
|
|
|
|
|| name == "PrettyNoEscapes"
|
|
|
|
|| name == "PrettyCompactNoEscapes"
|
|
|
|
|| name == "PrettySpaceNoEscapes"
|
|
|
|
|| name == "Vertical"
|
|
|
|
|| name == "VerticalRaw"
|
|
|
|
|| name == "Null"
|
|
|
|
|| name == "JSON"
|
|
|
|
|| name == "JSONCompact"
|
|
|
|
|| name == "XML"
|
|
|
|
|| name == "ODBCDriver")
|
|
|
|
{
|
|
|
|
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
|
2011-10-24 12:10:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Block & sample, const Context & context)
|
2011-10-24 12:10:59 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const Settings & settings = context.getSettingsRef();
|
2017-07-06 14:42:31 +00:00
|
|
|
FormatSettingsJSON json_settings(settings.output_format_json_quote_64bit_integers, settings.output_format_json_quote_denormals);
|
2016-12-08 00:25:52 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (name == "Native")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<NativeBlockOutputStream>(buf, 0, sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "RowBinary")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<BinaryRowOutputStream>(buf), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "TabSeparated" || name == "TSV")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "TabSeparatedWithNames" || name == "TSVWithNames")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample, true), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "TabSeparatedWithNamesAndTypes" || name == "TSVWithNamesAndTypes")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample, true, true), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "TabSeparatedRaw" || name == "TSVRaw")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRawRowOutputStream>(buf, sample), sample);
|
2018-04-21 22:52:24 +00:00
|
|
|
else if (name == "CSV" || name == "CSVWithNames")
|
|
|
|
{
|
2018-04-21 23:38:47 +00:00
|
|
|
char csv_delimiter = settings.format_csv_delimiter;
|
2018-04-21 22:52:24 +00:00
|
|
|
bool with_names = name == "CSVWithNames";
|
2018-04-21 23:38:47 +00:00
|
|
|
|
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<CSVRowOutputStream>(buf, sample, csv_delimiter, with_names), sample);
|
2018-04-21 22:52:24 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "Pretty")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<PrettyBlockOutputStream>(buf, sample, false, settings.output_format_pretty_max_rows, context);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "PrettyCompact")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<PrettyCompactBlockOutputStream>(buf, sample, false, settings.output_format_pretty_max_rows, context);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "PrettyCompactMonoBlock")
|
2017-07-11 21:41:37 +00:00
|
|
|
{
|
2018-02-19 00:45:32 +00:00
|
|
|
BlockOutputStreamPtr dst = std::make_shared<PrettyCompactBlockOutputStream>(buf, sample, false, settings.output_format_pretty_max_rows, context);
|
2017-07-12 00:41:14 +00:00
|
|
|
auto res = std::make_shared<SquashingBlockOutputStream>(dst, settings.output_format_pretty_max_rows, 0);
|
|
|
|
res->disableFlush();
|
|
|
|
return res;
|
2017-07-11 21:41:37 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "PrettySpace")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<PrettySpaceBlockOutputStream>(buf, sample, false, settings.output_format_pretty_max_rows, context);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "PrettyNoEscapes")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<PrettyBlockOutputStream>(buf, sample, true, settings.output_format_pretty_max_rows, context);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "PrettyCompactNoEscapes")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<PrettyCompactBlockOutputStream>(buf, sample, true, settings.output_format_pretty_max_rows, context);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "PrettySpaceNoEscapes")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<PrettySpaceBlockOutputStream>(buf, sample, true, settings.output_format_pretty_max_rows, context);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "Vertical")
|
2017-05-22 19:00:45 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRowOutputStream>(
|
2018-02-19 00:45:32 +00:00
|
|
|
buf, sample, settings.output_format_pretty_max_rows), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "VerticalRaw")
|
2017-05-22 19:00:45 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRawRowOutputStream>(
|
2018-02-19 00:45:32 +00:00
|
|
|
buf, sample, settings.output_format_pretty_max_rows), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "Values")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<ValuesRowOutputStream>(buf), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "JSON")
|
2017-07-05 16:28:57 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONRowOutputStream>(
|
2018-02-19 00:45:32 +00:00
|
|
|
buf, sample, settings.output_format_write_statistics, json_settings), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "JSONCompact")
|
2017-07-05 16:28:57 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONCompactRowOutputStream>(
|
2018-02-19 00:45:32 +00:00
|
|
|
buf, sample, settings.output_format_write_statistics, json_settings), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "JSONEachRow")
|
2017-07-05 16:28:57 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONEachRowRowOutputStream>(
|
2018-02-19 00:45:32 +00:00
|
|
|
buf, sample, json_settings), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "XML")
|
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<XMLRowOutputStream>(buf, sample,
|
2018-02-19 00:45:32 +00:00
|
|
|
settings.output_format_write_statistics), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "TSKV")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TSKVRowOutputStream>(buf, sample), sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "ODBCDriver")
|
2017-04-19 13:42:58 +00:00
|
|
|
return std::make_shared<ODBCDriverBlockOutputStream>(buf, sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "Null")
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<NullBlockOutputStream>(sample);
|
2017-04-01 07:20:54 +00:00
|
|
|
else
|
|
|
|
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
|
2011-10-24 12:10:59 +00:00
|
|
|
}
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer & buf,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Block & sample, const Context & context) const
|
2016-02-16 16:39:39 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
/** Materialization is needed, because formats can use the functions `IDataType`,
|
|
|
|
* which only work with full columns.
|
|
|
|
*/
|
2018-02-19 00:45:32 +00:00
|
|
|
return std::make_shared<MaterializingBlockOutputStream>(getOutputImpl(name, buf, materializeBlock(sample), context), sample);
|
2016-02-16 16:39:39 +00:00
|
|
|
}
|
|
|
|
|
2011-10-24 12:10:59 +00:00
|
|
|
}
|