2018-06-05 21:39:01 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/types.h>
|
2018-06-08 01:51:55 +00:00
|
|
|
|
|
|
|
|
2018-06-05 21:39:01 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-11-07 08:53:39 +00:00
|
|
|
/**
|
|
|
|
* Various tweaks for input/output formats. Text serialization/deserialization
|
|
|
|
* of data types also depend on some of these settings. It is different from
|
|
|
|
* FormatFactorySettings in that it has all necessary user-provided settings
|
|
|
|
* combined with information from context etc, that we can use directly during
|
|
|
|
* serialization. In contrast, FormatFactorySettings' job is to reflect the
|
|
|
|
* changes made to user-visible format settings, such as when tweaking the
|
|
|
|
* the format for File engine.
|
|
|
|
* NOTE Parameters for unrelated formats and unrelated data types are collected
|
|
|
|
* in this struct - it prevents modularity, but they are difficult to separate.
|
2018-06-08 01:51:55 +00:00
|
|
|
*/
|
2018-06-05 21:39:01 +00:00
|
|
|
struct FormatSettings
|
|
|
|
{
|
2020-04-27 15:21:53 +00:00
|
|
|
/// Format will be used for streaming. Not every formats support it
|
|
|
|
/// Option means that each chunk of data need to be formatted independently. Also each chunk will be flushed at the end of processing.
|
2020-04-26 13:44:11 +00:00
|
|
|
bool enable_streaming = false;
|
|
|
|
|
2018-06-08 01:51:55 +00:00
|
|
|
bool skip_unknown_fields = false;
|
2019-04-22 13:31:17 +00:00
|
|
|
bool with_names_use_header = false;
|
2021-10-14 10:32:49 +00:00
|
|
|
bool with_types_use_header = false;
|
2018-06-08 01:51:55 +00:00
|
|
|
bool write_statistics = true;
|
2018-09-14 13:43:57 +00:00
|
|
|
bool import_nested_json = false;
|
2021-04-02 21:05:40 +00:00
|
|
|
bool null_as_default = true;
|
2021-08-16 08:03:23 +00:00
|
|
|
bool decimal_trailing_zeros = false;
|
2021-10-14 10:32:49 +00:00
|
|
|
bool defaults_for_omitted_fields = true;
|
2018-06-08 01:51:55 +00:00
|
|
|
|
2021-10-31 19:53:24 +00:00
|
|
|
bool seekable_read = true;
|
|
|
|
|
2018-06-05 21:39:01 +00:00
|
|
|
enum class DateTimeInputFormat
|
|
|
|
{
|
|
|
|
Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp.
|
|
|
|
BestEffort /// Use sophisticated rules to parse whatever possible.
|
|
|
|
};
|
|
|
|
|
|
|
|
DateTimeInputFormat date_time_input_format = DateTimeInputFormat::Basic;
|
2018-06-10 19:22:49 +00:00
|
|
|
|
2020-10-13 10:59:43 +00:00
|
|
|
enum class DateTimeOutputFormat
|
|
|
|
{
|
|
|
|
Simple,
|
|
|
|
ISO,
|
|
|
|
UnixTimestamp
|
|
|
|
};
|
|
|
|
|
2021-11-09 13:14:07 +00:00
|
|
|
enum class EscapingRule
|
|
|
|
{
|
|
|
|
None,
|
|
|
|
Escaped,
|
|
|
|
Quoted,
|
|
|
|
CSV,
|
|
|
|
JSON,
|
|
|
|
XML,
|
|
|
|
Raw
|
|
|
|
};
|
|
|
|
|
2020-10-13 10:59:43 +00:00
|
|
|
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
|
|
|
|
|
2018-06-10 19:22:49 +00:00
|
|
|
UInt64 input_allow_errors_num = 0;
|
2019-01-07 19:56:53 +00:00
|
|
|
Float32 input_allow_errors_ratio = 0;
|
2019-02-19 20:51:44 +00:00
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct
|
2020-05-03 12:26:39 +00:00
|
|
|
{
|
|
|
|
UInt64 row_group_size = 1000000;
|
2021-05-25 12:01:28 +00:00
|
|
|
bool low_cardinality_as_dictionary = false;
|
2021-07-01 17:59:28 +00:00
|
|
|
bool import_nested = false;
|
2020-05-03 12:26:39 +00:00
|
|
|
} arrow;
|
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct
|
2019-02-19 20:51:44 +00:00
|
|
|
{
|
2020-11-02 07:50:38 +00:00
|
|
|
String schema_registry_url;
|
|
|
|
String output_codec;
|
|
|
|
UInt64 output_sync_interval = 16 * 1024;
|
|
|
|
bool allow_missing_fields = false;
|
2021-07-09 16:18:22 +00:00
|
|
|
String string_column_pattern;
|
2021-11-02 11:06:10 +00:00
|
|
|
UInt64 output_rows_in_file = 1;
|
2020-11-02 07:50:38 +00:00
|
|
|
} avro;
|
2019-02-19 20:51:44 +00:00
|
|
|
|
2021-11-19 05:22:44 +00:00
|
|
|
String bool_true_representation = "true";
|
|
|
|
String bool_false_representation = "false";
|
2021-11-10 08:08:24 +00:00
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct CSV
|
2019-12-25 19:17:41 +00:00
|
|
|
{
|
2020-11-02 07:50:38 +00:00
|
|
|
char delimiter = ',';
|
|
|
|
bool allow_single_quotes = true;
|
|
|
|
bool allow_double_quotes = true;
|
|
|
|
bool empty_as_default = false;
|
|
|
|
bool crlf_end_of_line = false;
|
|
|
|
bool input_format_enum_as_number = false;
|
2020-12-20 10:26:08 +00:00
|
|
|
bool input_format_arrays_as_nested_csv = false;
|
2021-09-17 02:18:01 +00:00
|
|
|
String null_representation = "\\N";
|
2021-12-20 16:25:54 +00:00
|
|
|
char tuple_delimiter = ',';
|
2020-11-02 07:50:38 +00:00
|
|
|
} csv;
|
2019-12-25 19:17:41 +00:00
|
|
|
|
|
|
|
struct Custom
|
|
|
|
{
|
|
|
|
std::string result_before_delimiter;
|
|
|
|
std::string result_after_delimiter;
|
|
|
|
std::string row_before_delimiter;
|
|
|
|
std::string row_after_delimiter;
|
|
|
|
std::string row_between_delimiter;
|
|
|
|
std::string field_delimiter;
|
2021-11-09 13:14:07 +00:00
|
|
|
EscapingRule escaping_rule = EscapingRule::Escaped;
|
2020-11-02 07:50:38 +00:00
|
|
|
} custom;
|
2019-12-25 19:17:41 +00:00
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct
|
|
|
|
{
|
2020-11-17 19:50:47 +00:00
|
|
|
bool array_of_rows = false;
|
2020-11-02 07:50:38 +00:00
|
|
|
bool quote_64bit_integers = true;
|
|
|
|
bool quote_denormals = true;
|
|
|
|
bool escape_forward_slashes = true;
|
2020-11-18 10:38:30 +00:00
|
|
|
bool named_tuples_as_objects = false;
|
2020-11-02 07:50:38 +00:00
|
|
|
bool serialize_as_strings = false;
|
|
|
|
} json;
|
2020-01-08 09:13:12 +00:00
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct
|
2020-01-08 09:13:12 +00:00
|
|
|
{
|
2020-11-02 07:50:38 +00:00
|
|
|
UInt64 row_group_size = 1000000;
|
2021-07-01 17:59:28 +00:00
|
|
|
bool import_nested = false;
|
2020-11-02 07:50:38 +00:00
|
|
|
} parquet;
|
|
|
|
|
|
|
|
struct Pretty
|
|
|
|
{
|
|
|
|
UInt64 max_rows = 10000;
|
|
|
|
UInt64 max_column_pad_width = 250;
|
|
|
|
UInt64 max_value_width = 10000;
|
|
|
|
bool color = true;
|
|
|
|
|
|
|
|
bool output_format_pretty_row_numbers = false;
|
|
|
|
|
|
|
|
enum class Charset
|
|
|
|
{
|
|
|
|
UTF8,
|
|
|
|
ASCII,
|
|
|
|
};
|
2020-01-11 07:01:20 +00:00
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
Charset charset = Charset::UTF8;
|
|
|
|
} pretty;
|
2020-01-08 09:13:12 +00:00
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
/**
|
2020-11-07 08:53:39 +00:00
|
|
|
* Some buffers (kafka / rabbit) split the rows internally using callback,
|
|
|
|
* and always send one row per message, so we can push there formats
|
|
|
|
* without framing / delimiters (like ProtobufSingle). In other cases,
|
|
|
|
* we have to enforce exporting at most one row in the format output,
|
|
|
|
* because Protobuf without delimiters is not generally useful.
|
2020-11-02 07:50:38 +00:00
|
|
|
*/
|
2021-01-11 01:50:30 +00:00
|
|
|
bool allow_multiple_rows_without_delimiter = false;
|
2020-11-02 07:50:38 +00:00
|
|
|
} protobuf;
|
|
|
|
|
2021-07-16 10:10:56 +00:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
uint32_t client_capabilities = 0;
|
|
|
|
size_t max_packet_size = 0;
|
|
|
|
uint8_t * sequence_id = nullptr; /// Not null if it's MySQLWire output format used to handle MySQL protocol connections.
|
|
|
|
} mysql_wire;
|
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct
|
2020-02-14 19:48:45 +00:00
|
|
|
{
|
|
|
|
std::string regexp;
|
2021-11-09 13:14:07 +00:00
|
|
|
EscapingRule escaping_rule = EscapingRule::Raw;
|
2020-02-14 19:48:45 +00:00
|
|
|
bool skip_unmatched = false;
|
2020-11-02 07:50:38 +00:00
|
|
|
} regexp;
|
|
|
|
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
std::string format_schema;
|
|
|
|
std::string format_schema_path;
|
|
|
|
bool is_server = false;
|
|
|
|
} schema;
|
|
|
|
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
String resultset_format;
|
|
|
|
String row_format;
|
|
|
|
String row_between_delimiter;
|
|
|
|
} template_settings;
|
2020-02-14 19:48:45 +00:00
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
bool empty_as_default = false;
|
|
|
|
bool crlf_end_of_line = false;
|
|
|
|
String null_representation = "\\N";
|
|
|
|
bool input_format_enum_as_number = false;
|
|
|
|
} tsv;
|
2020-02-14 19:48:45 +00:00
|
|
|
|
2020-11-02 07:50:38 +00:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
bool interpret_expressions = true;
|
|
|
|
bool deduce_templates_of_expressions = true;
|
|
|
|
bool accurate_types_of_literals = true;
|
|
|
|
} values;
|
2021-07-01 17:59:28 +00:00
|
|
|
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
bool import_nested = false;
|
2021-12-18 09:25:25 +00:00
|
|
|
int64_t row_batch_size = 100'000;
|
2021-07-01 17:59:28 +00:00
|
|
|
} orc;
|
2021-09-28 12:59:22 +00:00
|
|
|
|
2021-09-28 13:07:00 +00:00
|
|
|
/// For capnProto format we should determine how to
|
2021-09-28 12:59:22 +00:00
|
|
|
/// compare ClickHouse Enum and Enum from schema.
|
|
|
|
enum class EnumComparingMode
|
|
|
|
{
|
|
|
|
BY_NAMES, // Names in enums should be the same, values can be different.
|
|
|
|
BY_NAMES_CASE_INSENSITIVE, // Case-insensitive name comparison.
|
|
|
|
BY_VALUES, // Values should be the same, names can be different.
|
|
|
|
};
|
|
|
|
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES;
|
|
|
|
} capn_proto;
|
2018-06-05 21:39:01 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|