ClickHouse/dbms/src/DataStreams/FormatFactory.cpp

195 lines
9.6 KiB
C++
Raw Normal View History

#include <DB/Interpreters/Context.h>
2011-10-24 12:10:59 +00:00
#include <DB/DataStreams/NativeBlockInputStream.h>
#include <DB/DataStreams/NativeBlockOutputStream.h>
#include <DB/DataStreams/TabSeparatedRowInputStream.h>
#include <DB/DataStreams/TabSeparatedRowOutputStream.h>
2012-08-17 19:53:11 +00:00
#include <DB/DataStreams/TabSeparatedRawRowOutputStream.h>
#include <DB/DataStreams/BinaryRowInputStream.h>
#include <DB/DataStreams/BinaryRowOutputStream.h>
2011-10-30 05:19:41 +00:00
#include <DB/DataStreams/ValuesRowInputStream.h>
#include <DB/DataStreams/ValuesRowOutputStream.h>
2011-11-06 06:22:52 +00:00
#include <DB/DataStreams/TabSeparatedBlockOutputStream.h>
2011-11-07 01:15:37 +00:00
#include <DB/DataStreams/PrettyBlockOutputStream.h>
2011-11-28 04:05:53 +00:00
#include <DB/DataStreams/PrettyCompactBlockOutputStream.h>
#include <DB/DataStreams/PrettySpaceBlockOutputStream.h>
#include <DB/DataStreams/VerticalRowOutputStream.h>
2012-05-08 11:19:00 +00:00
#include <DB/DataStreams/NullBlockOutputStream.h>
2011-10-24 12:10:59 +00:00
#include <DB/DataStreams/BlockInputStreamFromRowInputStream.h>
#include <DB/DataStreams/BlockOutputStreamFromRowOutputStream.h>
#include <DB/DataStreams/JSONRowOutputStream.h>
#include <DB/DataStreams/JSONCompactRowOutputStream.h>
#include <DB/DataStreams/JSONEachRowRowOutputStream.h>
#include <DB/DataStreams/JSONEachRowRowInputStream.h>
#include <DB/DataStreams/XMLRowOutputStream.h>
2015-07-18 04:27:38 +00:00
#include <DB/DataStreams/TSKVRowOutputStream.h>
#include <DB/DataStreams/TSKVRowInputStream.h>
#include <DB/DataStreams/PrettyCompactMonoBlockOutputStream.h>
#include <DB/DataStreams/ODBCDriverBlockOutputStream.h>
#include <DB/DataStreams/CSVRowInputStream.h>
#include <DB/DataStreams/CSVRowOutputStream.h>
#include <DB/DataStreams/MaterializingBlockOutputStream.h>
2011-10-24 12:10:59 +00:00
#include <DB/DataStreams/FormatFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT;
extern const int UNKNOWN_FORMAT;
}
2011-10-24 12:10:59 +00:00
BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & buf,
const Block & sample, const Context & context, size_t max_block_size) const
2011-10-24 12:10:59 +00:00
{
const Settings & settings = context.getSettingsRef();
auto wrap_row_stream = [&](auto && row_stream)
{
return std::make_shared<BlockInputStreamFromRowInputStream>(std::move(row_stream), sample, max_block_size,
settings.input_format_allow_errors_num, settings.input_format_allow_errors_ratio);
};
Text formats allow to skip errors (#407) * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Added test [#CLICKHOUSE-2778].
2017-01-27 04:29:47 +00:00
if (name == "Native")
{
return std::make_shared<NativeBlockInputStream>(buf);
}
else if (name == "RowBinary")
{
return wrap_row_stream(std::make_shared<BinaryRowInputStream>(buf));
}
else if (name == "TabSeparated" || name == "TSV") /// TSV is a synonym/alias for the original TabSeparated format
{
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample));
}
else if (name == "TabSeparatedWithNames" || name == "TSVWithNames")
{
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample, true));
}
else if (name == "TabSeparatedWithNamesAndTypes" || name == "TSVWithNamesAndTypes")
{
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample, true, true));
}
else if (name == "Values")
{
return wrap_row_stream(std::make_shared<ValuesRowInputStream>(buf, context, settings.input_format_values_interpret_expressions));
}
else if (name == "CSV")
{
return wrap_row_stream(std::make_shared<CSVRowInputStream>(buf, sample, ','));
}
else if (name == "CSVWithNames")
{
return wrap_row_stream(std::make_shared<CSVRowInputStream>(buf, sample, ',', true));
}
else if (name == "TSKV")
{
return wrap_row_stream(std::make_shared<TSKVRowInputStream>(buf, sample, settings.input_format_skip_unknown_fields));
}
else if (name == "JSONEachRow")
{
return wrap_row_stream(std::make_shared<JSONEachRowRowInputStream>(buf, sample, settings.input_format_skip_unknown_fields));
}
else if (name == "TabSeparatedRaw"
|| name == "TSVRaw"
|| name == "BlockTabSeparated"
|| name == "Pretty"
|| name == "PrettyCompact"
|| name == "PrettyCompactMonoBlock"
|| name == "PrettySpace"
|| name == "PrettyNoEscapes"
|| name == "PrettyCompactNoEscapes"
|| name == "PrettySpaceNoEscapes"
|| name == "Vertical"
|| name == "VerticalRaw"
|| name == "Null"
|| name == "JSON"
|| name == "JSONCompact"
|| name == "XML"
|| name == "ODBCDriver")
{
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
}
else
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
2011-10-24 12:10:59 +00:00
}
static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf,
const Block & sample, const Context & context)
2011-10-24 12:10:59 +00:00
{
const Settings & settings = context.getSettingsRef();
if (name == "Native")
return std::make_shared<NativeBlockOutputStream>(buf);
else if (name == "RowBinary")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<BinaryRowOutputStream>(buf));
else if (name == "TabSeparated" || name == "TSV")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample));
else if (name == "TabSeparatedWithNames" || name == "TSVWithNames")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample, true));
else if (name == "TabSeparatedWithNamesAndTypes" || name == "TSVWithNamesAndTypes")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample, true, true));
else if (name == "TabSeparatedRaw" || name == "TSVRaw")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRawRowOutputStream>(buf, sample));
else if (name == "BlockTabSeparated")
return std::make_shared<TabSeparatedBlockOutputStream>(buf);
else if (name == "CSV")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<CSVRowOutputStream>(buf, sample));
else if (name == "CSVWithNames")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<CSVRowOutputStream>(buf, sample, true));
else if (name == "Pretty")
return std::make_shared<PrettyBlockOutputStream>(buf, false, settings.output_format_pretty_max_rows, context);
else if (name == "PrettyCompact")
return std::make_shared<PrettyCompactBlockOutputStream>(buf, false, settings.output_format_pretty_max_rows, context);
else if (name == "PrettyCompactMonoBlock")
return std::make_shared<PrettyCompactMonoBlockOutputStream>(buf, false, settings.output_format_pretty_max_rows, context);
else if (name == "PrettySpace")
return std::make_shared<PrettySpaceBlockOutputStream>(buf, false, settings.output_format_pretty_max_rows, context);
else if (name == "PrettyNoEscapes")
return std::make_shared<PrettyBlockOutputStream>(buf, true, settings.output_format_pretty_max_rows, context);
else if (name == "PrettyCompactNoEscapes")
return std::make_shared<PrettyCompactBlockOutputStream>(buf, true, settings.output_format_pretty_max_rows, context);
else if (name == "PrettySpaceNoEscapes")
return std::make_shared<PrettySpaceBlockOutputStream>(buf, true, settings.output_format_pretty_max_rows, context);
else if (name == "Vertical")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRowOutputStream>(buf, sample, context));
else if (name == "VerticalRaw")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRawRowOutputStream>(buf, sample, context));
else if (name == "Values")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<ValuesRowOutputStream>(buf));
else if (name == "JSON")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONRowOutputStream>(buf, sample,
settings.output_format_write_statistics, settings.output_format_json_quote_64bit_integers));
else if (name == "JSONCompact")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONCompactRowOutputStream>(buf, sample,
settings.output_format_write_statistics, settings.output_format_json_quote_64bit_integers));
else if (name == "JSONEachRow")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONEachRowRowOutputStream>(buf, sample,
settings.output_format_json_quote_64bit_integers));
else if (name == "XML")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<XMLRowOutputStream>(buf, sample,
settings.output_format_write_statistics));
else if (name == "TSKV")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TSKVRowOutputStream>(buf, sample));
else if (name == "ODBCDriver")
return std::make_shared<ODBCDriverBlockOutputStream>(buf);
else if (name == "Null")
return std::make_shared<NullBlockOutputStream>();
else
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
2011-10-24 12:10:59 +00:00
}
BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer & buf,
const Block & sample, const Context & context) const
{
/** Materialization is needed, because formats can use the functions `IDataType`,
* which only work with full columns.
*/
return std::make_shared<MaterializingBlockOutputStream>(getOutputImpl(name, buf, sample, context));
}
2011-10-24 12:10:59 +00:00
}