ClickHouse/dbms/src/DataStreams/FormatFactory.cpp

198 lines
9.6 KiB
C++
Raw Normal View History

#include <Interpreters/Context.h>
#include <DataStreams/NativeBlockInputStream.h>
#include <DataStreams/NativeBlockOutputStream.h>
#include <DataStreams/TabSeparatedRowInputStream.h>
#include <DataStreams/TabSeparatedRowOutputStream.h>
#include <DataStreams/TabSeparatedRawRowOutputStream.h>
#include <DataStreams/BinaryRowInputStream.h>
#include <DataStreams/BinaryRowOutputStream.h>
#include <DataStreams/ValuesRowInputStream.h>
#include <DataStreams/ValuesRowOutputStream.h>
#include <DataStreams/TabSeparatedBlockOutputStream.h>
#include <DataStreams/PrettyBlockOutputStream.h>
#include <DataStreams/PrettyCompactBlockOutputStream.h>
#include <DataStreams/PrettySpaceBlockOutputStream.h>
#include <DataStreams/VerticalRowOutputStream.h>
#include <DataStreams/NullBlockOutputStream.h>
#include <DataStreams/BlockInputStreamFromRowInputStream.h>
#include <DataStreams/BlockOutputStreamFromRowOutputStream.h>
#include <DataStreams/JSONRowOutputStream.h>
#include <DataStreams/JSONCompactRowOutputStream.h>
#include <DataStreams/JSONEachRowRowOutputStream.h>
#include <DataStreams/JSONEachRowRowInputStream.h>
#include <DataStreams/XMLRowOutputStream.h>
#include <DataStreams/TSKVRowOutputStream.h>
#include <DataStreams/TSKVRowInputStream.h>
#include <DataStreams/PrettyCompactMonoBlockOutputStream.h>
#include <DataStreams/ODBCDriverBlockOutputStream.h>
#include <DataStreams/CSVRowInputStream.h>
#include <DataStreams/CSVRowOutputStream.h>
#include <DataStreams/MaterializingBlockOutputStream.h>
#include <DataStreams/FormatFactory.h>
2017-07-05 16:28:57 +00:00
#include <DataTypes/FormatSettingsJSON.h>
2011-10-24 12:10:59 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT;
extern const int UNKNOWN_FORMAT;
}
2011-10-24 12:10:59 +00:00
BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & buf,
const Block & sample, const Context & context, size_t max_block_size) const
2011-10-24 12:10:59 +00:00
{
const Settings & settings = context.getSettingsRef();
auto wrap_row_stream = [&](auto && row_stream)
{
return std::make_shared<BlockInputStreamFromRowInputStream>(std::move(row_stream), sample, max_block_size,
settings.input_format_allow_errors_num, settings.input_format_allow_errors_ratio);
};
Text formats allow to skip errors (#407) * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Allow to skip errors in text formats: added settings 'input_format_allow_errors_num' and 'input_format_allow_errors_ratio' [#CLICKHOUSE-2778]. https://github.com/yandex/ClickHouse/issues/134 * Added test [#CLICKHOUSE-2778].
2017-01-27 04:29:47 +00:00
if (name == "Native")
{
return std::make_shared<NativeBlockInputStream>(buf);
}
else if (name == "RowBinary")
{
return wrap_row_stream(std::make_shared<BinaryRowInputStream>(buf));
}
else if (name == "TabSeparated" || name == "TSV") /// TSV is a synonym/alias for the original TabSeparated format
{
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample));
}
else if (name == "TabSeparatedWithNames" || name == "TSVWithNames")
{
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample, true));
}
else if (name == "TabSeparatedWithNamesAndTypes" || name == "TSVWithNamesAndTypes")
{
return wrap_row_stream(std::make_shared<TabSeparatedRowInputStream>(buf, sample, true, true));
}
else if (name == "Values")
{
return wrap_row_stream(std::make_shared<ValuesRowInputStream>(buf, context, settings.input_format_values_interpret_expressions));
}
else if (name == "CSV")
{
return wrap_row_stream(std::make_shared<CSVRowInputStream>(buf, sample, ','));
}
else if (name == "CSVWithNames")
{
return wrap_row_stream(std::make_shared<CSVRowInputStream>(buf, sample, ',', true));
}
else if (name == "TSKV")
{
return wrap_row_stream(std::make_shared<TSKVRowInputStream>(buf, sample, settings.input_format_skip_unknown_fields));
}
else if (name == "JSONEachRow")
{
return wrap_row_stream(std::make_shared<JSONEachRowRowInputStream>(buf, sample, settings.input_format_skip_unknown_fields));
}
else if (name == "TabSeparatedRaw"
|| name == "TSVRaw"
|| name == "BlockTabSeparated"
|| name == "Pretty"
|| name == "PrettyCompact"
|| name == "PrettyCompactMonoBlock"
|| name == "PrettySpace"
|| name == "PrettyNoEscapes"
|| name == "PrettyCompactNoEscapes"
|| name == "PrettySpaceNoEscapes"
|| name == "Vertical"
|| name == "VerticalRaw"
|| name == "Null"
|| name == "JSON"
|| name == "JSONCompact"
|| name == "XML"
|| name == "ODBCDriver")
{
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
}
else
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
2011-10-24 12:10:59 +00:00
}
static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf,
const Block & sample, const Context & context)
2011-10-24 12:10:59 +00:00
{
const Settings & settings = context.getSettingsRef();
2017-07-05 16:28:57 +00:00
FormatSettingsJSON json_settings(settings.output_format_json_quote_64bit_integers, true);
if (name == "Native")
return std::make_shared<NativeBlockOutputStream>(buf);
else if (name == "RowBinary")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<BinaryRowOutputStream>(buf));
else if (name == "TabSeparated" || name == "TSV")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample));
else if (name == "TabSeparatedWithNames" || name == "TSVWithNames")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample, true));
else if (name == "TabSeparatedWithNamesAndTypes" || name == "TSVWithNamesAndTypes")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRowOutputStream>(buf, sample, true, true));
else if (name == "TabSeparatedRaw" || name == "TSVRaw")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TabSeparatedRawRowOutputStream>(buf, sample));
else if (name == "BlockTabSeparated")
return std::make_shared<TabSeparatedBlockOutputStream>(buf);
else if (name == "CSV")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<CSVRowOutputStream>(buf, sample));
else if (name == "CSVWithNames")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<CSVRowOutputStream>(buf, sample, true));
else if (name == "Pretty")
return std::make_shared<PrettyBlockOutputStream>(buf, false, settings.output_format_pretty_max_rows, context);
else if (name == "PrettyCompact")
return std::make_shared<PrettyCompactBlockOutputStream>(buf, false, settings.output_format_pretty_max_rows, context);
else if (name == "PrettyCompactMonoBlock")
return std::make_shared<PrettyCompactMonoBlockOutputStream>(buf, false, settings.output_format_pretty_max_rows, context);
else if (name == "PrettySpace")
return std::make_shared<PrettySpaceBlockOutputStream>(buf, false, settings.output_format_pretty_max_rows, context);
else if (name == "PrettyNoEscapes")
return std::make_shared<PrettyBlockOutputStream>(buf, true, settings.output_format_pretty_max_rows, context);
else if (name == "PrettyCompactNoEscapes")
return std::make_shared<PrettyCompactBlockOutputStream>(buf, true, settings.output_format_pretty_max_rows, context);
else if (name == "PrettySpaceNoEscapes")
return std::make_shared<PrettySpaceBlockOutputStream>(buf, true, settings.output_format_pretty_max_rows, context);
else if (name == "Vertical")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRowOutputStream>(
buf, sample, settings.output_format_pretty_max_rows, context));
else if (name == "VerticalRaw")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<VerticalRawRowOutputStream>(
buf, sample, settings.output_format_pretty_max_rows, context));
else if (name == "Values")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<ValuesRowOutputStream>(buf));
else if (name == "JSON")
2017-07-05 16:28:57 +00:00
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONRowOutputStream>(
buf, sample, settings.output_format_write_statistics, json_settings));
else if (name == "JSONCompact")
2017-07-05 16:28:57 +00:00
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONCompactRowOutputStream>(
buf, sample, settings.output_format_write_statistics, json_settings));
else if (name == "JSONEachRow")
2017-07-05 16:28:57 +00:00
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<JSONEachRowRowOutputStream>(
buf, sample, json_settings));
else if (name == "XML")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<XMLRowOutputStream>(buf, sample,
settings.output_format_write_statistics));
else if (name == "TSKV")
return std::make_shared<BlockOutputStreamFromRowOutputStream>(std::make_shared<TSKVRowOutputStream>(buf, sample));
else if (name == "ODBCDriver")
return std::make_shared<ODBCDriverBlockOutputStream>(buf, sample);
else if (name == "Null")
return std::make_shared<NullBlockOutputStream>();
else
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
2011-10-24 12:10:59 +00:00
}
BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer & buf,
const Block & sample, const Context & context) const
{
/** Materialization is needed, because formats can use the functions `IDataType`,
* which only work with full columns.
*/
return std::make_shared<MaterializingBlockOutputStream>(getOutputImpl(name, buf, sample, context));
}
2011-10-24 12:10:59 +00:00
}