From 9bb68bc6de40d2eb84329ec2b642c714d3ef661d Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 27 Jun 2022 18:31:57 +0000 Subject: [PATCH 1/5] Add SQLInsert output format --- docs/en/interfaces/formats.md | 152 +++++++++++------- docs/en/operations/settings/settings.md | 26 +++ src/Core/Settings.h | 5 + src/Formats/FormatFactory.cpp | 4 + src/Formats/FormatSettings.h | 8 + src/Formats/registerFormats.cpp | 2 + .../Formats/Impl/SQLInsertRowOutputFormat.cpp | 86 ++++++++++ .../Formats/Impl/SQLInsertRowOutputFormat.h | 40 +++++ .../02322_sql_insert_format.reference | 17 ++ .../0_stateless/02322_sql_insert_format.sql | 8 + 10 files changed, 286 insertions(+), 62 deletions(-) create mode 100644 src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h create mode 100644 tests/queries/0_stateless/02322_sql_insert_format.reference create mode 100644 tests/queries/0_stateless/02322_sql_insert_format.sql diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 00fa382fd4d..27f42985fb3 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -11,68 +11,69 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: | Format | Input | Output | -|-------------------------------------------------------------------------------------------|-------|--------| -| [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | -| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | -| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | -| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | -| [Template](#format-template) | ✔ | ✔ | -| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | -| [CSV](#csv) | ✔ | ✔ | -| [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | -| [CustomSeparated](#format-customseparated) | ✔ | ✔ | -| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | -| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | -| [Values](#data-format-values) | ✔ | ✔ | -| [Vertical](#vertical) | ✗ | ✔ | -| [JSON](#json) | ✗ | ✔ | -| [JSONAsString](#jsonasstring) | ✔ | ✗ | -| [JSONStrings](#jsonstrings) | ✗ | ✔ | -| [JSONColumns](#jsoncolumns) | ✔ | ✔ | -| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | ✗ | ✔ | -| [JSONCompact](#jsoncompact) | ✗ | ✔ | -| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | -| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | -| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | -| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | -| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | -| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | -| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | -| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | -| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | -| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | -| [TSKV](#tskv) | ✔ | ✔ | -| [Pretty](#pretty) | ✗ | ✔ | -| [PrettyCompact](#prettycompact) | ✗ | ✔ | -| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | -| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | -| [PrettySpace](#prettyspace) | ✗ | ✔ | -| [Prometheus](#prometheus) | ✗ | ✔ | -| [Protobuf](#protobuf) | ✔ | ✔ | -| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | -| [Avro](#data-format-avro) | ✔ | ✔ | -| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | -| [Parquet](#data-format-parquet) | ✔ | ✔ | -| [Arrow](#data-format-arrow) | ✔ | ✔ | -| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | -| [ORC](#data-format-orc) | ✔ | ✔ | -| [RowBinary](#rowbinary) | ✔ | ✔ | -| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [Native](#native) | ✔ | ✔ | -| [Null](#null) | ✗ | ✔ | -| [XML](#xml) | ✗ | ✔ | -| [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✗ | -| [Regexp](#data-format-regexp) | ✔ | ✗ | -| [RawBLOB](#rawblob) | ✔ | ✔ | -| [MsgPack](#msgpack) | ✔ | ✔ | -| [MySQLDump](#mysqldump) | ✔ | ✗ | +|-------------------------------------------------------------------------------------------|------|--------| +| [TabSeparated](#tabseparated) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | +| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | +| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | +| [Template](#format-template) | ✔ | ✔ | +| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | +| [CSV](#csv) | ✔ | ✔ | +| [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | +| [CustomSeparated](#format-customseparated) | ✔ | ✔ | +| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | +| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | +| [SQLInsert](#sqlinsert) | ✗ | ✔ | +| [Values](#data-format-values) | ✔ | ✔ | +| [Vertical](#vertical) | ✗ | ✔ | +| [JSON](#json) | ✗ | ✔ | +| [JSONAsString](#jsonasstring) | ✔ | ✗ | +| [JSONStrings](#jsonstrings) | ✗ | ✔ | +| [JSONColumns](#jsoncolumns) | ✔ | ✔ | +| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | ✗ | ✔ | +| [JSONCompact](#jsoncompact) | ✗ | ✔ | +| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | +| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | +| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | +| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | +| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | +| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | +| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | +| [TSKV](#tskv) | ✔ | ✔ | +| [Pretty](#pretty) | ✗ | ✔ | +| [PrettyCompact](#prettycompact) | ✗ | ✔ | +| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | +| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | +| [PrettySpace](#prettyspace) | ✗ | ✔ | +| [Prometheus](#prometheus) | ✗ | ✔ | +| [Protobuf](#protobuf) | ✔ | ✔ | +| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [Avro](#data-format-avro) | ✔ | ✔ | +| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | +| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✔ | +| [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [Native](#native) | ✔ | ✔ | +| [Null](#null) | ✗ | ✔ | +| [XML](#xml) | ✗ | ✔ | +| [CapnProto](#capnproto) | ✔ | ✔ | +| [LineAsString](#lineasstring) | ✔ | ✗ | +| [Regexp](#data-format-regexp) | ✔ | ✗ | +| [RawBLOB](#rawblob) | ✔ | ✔ | +| [MsgPack](#msgpack) | ✔ | ✔ | +| [MySQLDump](#mysqldump) | ✔ | ✗ | You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section. @@ -468,6 +469,33 @@ Also prints the header row with column names, similar to [TabSeparatedWithNames] Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). +## SQLInsert {#sqlinsert} + +Outputs data as a sequence of `INSERT INTO table (columns...) VALUES (...), (...) ...;` statements. + +Example: + +```sql +SELECT number AS x, number + 1 AS y, 'Hello' AS z FROM numbers(10) FORMAT SQLInsert SETTINGS output_format_sql_insert_max_batch_size = 2 +``` + +```sql +INSERT INTO table (x, y, z) VALUES (0, 1, 'Hello'), (1, 2, 'Hello'); +INSERT INTO table (x, y, z) VALUES (2, 3, 'Hello'), (3, 4, 'Hello'); +INSERT INTO table (x, y, z) VALUES (4, 5, 'Hello'), (5, 6, 'Hello'); +INSERT INTO table (x, y, z) VALUES (6, 7, 'Hello'), (7, 8, 'Hello'); +INSERT INTO table (x, y, z) VALUES (8, 9, 'Hello'), (9, 10, 'Hello'); +``` + +To read data output by this format ypu can use [MySQLDump](#mysqldump) input format. + +### SQLInsert format settings {#sqlinsert-format-settings} + +- [output_format_sql_insert_max_batch_size](../operations/settings/settings.md#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. +- [output_format_sql_insert_table_name](../operations/settings/settings.md#output_format_sql_insert_table_name) - The name of table in the output INSERT query. Default value - `'table'`. +- [output_format_sql_insert_include_column_names](../operations/settings/settings.md#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. +- [output_format_sql_insert_use_replace](../operations/settings/settings.md#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. + ## JSON {#json} Outputs data in JSON format. Besides data tables, it also outputs column names and types, along with some additional information: the total number of output rows, and the number of rows that could have been output if there weren’t a LIMIT. Example: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 85265448c03..2fc124f9bab 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4637,3 +4637,29 @@ Possible values: - 1 — Enabled. Default value: 1. + +## SQLInsert format settings {$sqlinsert-format-settings} + +### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size} + +The maximum number of rows in one INSERT statement. + +Default value: `65505`. + +### output_format_sql_insert_table_name {#output_format_sql_insert_table_name} + +The name of table that will be used in the output INSERT statement. + +Default value: `'table''`. + +### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names} + +Include column names in INSERT statement. + +Default value: `true`. + +### output_format_sql_insert_use_replace {#output_format_sql_insert_use_replace} + +Use REPLACE keyword instead of INSERT. + +Default value: `false`. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f1fd9d20f00..e16ad65880b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -759,6 +759,11 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) \ M(String, input_format_mysql_dump_table_name, "", "Name of the table in MySQL dump from which to read data", 0) \ M(Bool, input_format_mysql_dump_map_column_names, true, "Match columns from table in MySQL dump and columns from ClickHouse table by names", 0) \ + \ + M(UInt64, output_format_sql_insert_max_batch_size, DEFAULT_BLOCK_SIZE, "The maximum number of rows in one INSERT statement.", 0) \ + M(String, output_format_sql_insert_table_name, "table", "The name of table in the output INSERT query", 0) \ + M(Bool, output_format_sql_insert_include_column_names, true, "Include column names in INSERT query", 0) \ + M(Bool, output_format_sql_insert_use_replace, false, "Use REPLACE statement instead of INSERT", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index dc6344137d2..c1714279a9c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -158,6 +158,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; + format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size; + format_settings.sql_insert.include_column_names = settings.output_format_sql_insert_include_column_names; + format_settings.sql_insert.table_name = settings.output_format_sql_insert_table_name; + format_settings.sql_insert.use_replace = settings.output_format_sql_insert_use_replace; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 7e0ce001405..2b587ab0c04 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -274,6 +274,14 @@ struct FormatSettings String table_name; bool map_column_names = true; } mysql_dump; + + struct + { + UInt64 max_batch_size = 65505; + String table_name = "table"; + bool include_column_names = true; + bool use_replace = false; + } sql_insert; }; } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 8493c84173d..0953572fab9 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -82,6 +82,7 @@ void registerOutputFormatMySQLWire(FormatFactory & factory); void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); void registerOutputFormatPrometheus(FormatFactory & factory); +void registerOutputFormatSQLInsert(FormatFactory & factory); /// Input only formats. @@ -205,6 +206,7 @@ void registerFormats() registerOutputFormatPostgreSQLWire(factory); registerOutputFormatCapnProto(factory); registerOutputFormatPrometheus(factory); + registerOutputFormatSQLInsert(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp new file mode 100644 index 00000000000..aa714af2716 --- /dev/null +++ b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp @@ -0,0 +1,86 @@ +#include +#include + + +namespace DB +{ + +SQLInsertRowOutputFormat::SQLInsertRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), format_settings(format_settings_) +{ +} + +void SQLInsertRowOutputFormat::writeRowStartDelimiter() +{ + if (rows_in_line == 0) + { + if (format_settings.sql_insert.use_replace) + writeCString("REPLACE INTO ", out); + else + writeCString("INSERT INTO ", out); + writeString(format_settings.sql_insert.table_name, out); + if (format_settings.sql_insert.include_column_names) + { + writeCString(" (", out); + for (size_t i = 0; i != column_names.size(); ++i) + { + writeString(column_names[i], out); + if (i + 1 != column_names.size()) + writeCString(", ", out); + } + writeChar(')', out); + } + writeCString(" VALUES ", out); + } + writeChar('(', out); +} + +void SQLInsertRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) +{ + serialization.serializeTextQuoted(column, row_num, out, format_settings); +} + +void SQLInsertRowOutputFormat::writeFieldDelimiter() +{ + writeCString(", ", out); +} + +void SQLInsertRowOutputFormat::writeRowEndDelimiter() +{ + writeChar(')', out); + ++rows_in_line; +} + +void SQLInsertRowOutputFormat::writeRowBetweenDelimiter() +{ + if (rows_in_line >= format_settings.sql_insert.max_batch_size) + { + writeCString(";\n", out); + rows_in_line = 0; + } + else + { + writeCString(", ", out); + } +} + +void SQLInsertRowOutputFormat::writeSuffix() +{ + writeCString(";\n", out); +} + + +void registerOutputFormatSQLInsert(FormatFactory & factory) +{ + factory.registerOutputFormat("SQLInsert", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, params, settings); + }); +} + + +} diff --git a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h new file mode 100644 index 00000000000..de39c82abac --- /dev/null +++ b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class WriteBuffer; + +class SQLInsertRowOutputFormat : public IRowOutputFormat +{ +public: + SQLInsertRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSettings & format_settings_); + + String getName() const override { return "SQLInsertRowOutputFormat"; } + + /// https://www.iana.org/assignments/media-types/text/tab-separated-values + String getContentType() const override { return "text/tab-separated-values; charset=UTF-8"; } + +protected: + void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; + virtual void writeFieldDelimiter() override; + virtual void writeRowStartDelimiter() override; + virtual void writeRowEndDelimiter() override; + virtual void writeRowBetweenDelimiter() override; + virtual void writeSuffix() override; + + size_t rows_in_line = 0; + Names column_names; + const FormatSettings format_settings; +}; + +} diff --git a/tests/queries/0_stateless/02322_sql_insert_format.reference b/tests/queries/0_stateless/02322_sql_insert_format.reference new file mode 100644 index 00000000000..220ee09e140 --- /dev/null +++ b/tests/queries/0_stateless/02322_sql_insert_format.reference @@ -0,0 +1,17 @@ +INSERT INTO table (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +INSERT INTO table (x, y, z) VALUES (0, 0, 'Hello'); +INSERT INTO table (x, y, z) VALUES (1, 1, 'Hello'); +INSERT INTO table (x, y, z) VALUES (2, 2, 'Hello'); +INSERT INTO table (x, y, z) VALUES (3, 0, 'Hello'); +INSERT INTO table (x, y, z) VALUES (4, 1, 'Hello'); +INSERT INTO table (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'); +INSERT INTO table (x, y, z) VALUES (2, 2, 'Hello'), (3, 0, 'Hello'); +INSERT INTO table (x, y, z) VALUES (4, 1, 'Hello'); +INSERT INTO table VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +REPLACE INTO table (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +INSERT INTO test (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +0 0 Hello +1 1 Hello +2 2 Hello +3 0 Hello +4 1 Hello diff --git a/tests/queries/0_stateless/02322_sql_insert_format.sql b/tests/queries/0_stateless/02322_sql_insert_format.sql new file mode 100644 index 00000000000..c9de12ba28c --- /dev/null +++ b/tests/queries/0_stateless/02322_sql_insert_format.sql @@ -0,0 +1,8 @@ +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_max_batch_size=1; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_max_batch_size=2; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_include_column_names=0; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_use_replace=1; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_table_name='test'; +insert into function file(02322_data.sql, 'SQLInsert') select number as x, number % 3 as y, 'Hello' as z from numbers(5) settings output_format_sql_insert_max_batch_size=2, engine_file_truncate_on_insert=1; +select * from file(02322_data.sql, 'MySQLDump'); From 3b4ecc93bcf2bb85c3d253cee9dd6ba3753c362a Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 28 Jun 2022 10:48:20 +0000 Subject: [PATCH 2/5] Fix test and style --- tests/queries/0_stateless/02322_sql_insert_format.sql | 2 ++ utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02322_sql_insert_format.sql b/tests/queries/0_stateless/02322_sql_insert_format.sql index c9de12ba28c..e00f1cd300b 100644 --- a/tests/queries/0_stateless/02322_sql_insert_format.sql +++ b/tests/queries/0_stateless/02322_sql_insert_format.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert; select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_max_batch_size=1; select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_max_batch_size=2; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2bfa98b80c7..a7d16d08b2b 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -121,6 +121,7 @@ SATA SERIALIZABLE SIMD SMALLINT +SQLInsert SQLSTATE SSSE Schemas @@ -411,6 +412,7 @@ simdjson skippingerrors sparsehash sql +sqlinsert src stacktraces statbox From 5b0fd31c64885b51870785489bc2375a318b098c Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 30 Jun 2022 16:14:30 +0000 Subject: [PATCH 3/5] Put column names in quotes --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 4 +- .../Formats/Impl/SQLInsertRowOutputFormat.cpp | 54 ++++++++++++------- .../Formats/Impl/SQLInsertRowOutputFormat.h | 3 ++ .../02322_sql_insert_format.reference | 26 +++++---- .../0_stateless/02322_sql_insert_format.sql | 5 +- 7 files changed, 63 insertions(+), 31 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 50495700236..a7074f13bb0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -767,6 +767,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(String, output_format_sql_insert_table_name, "table", "The name of table in the output INSERT query", 0) \ M(Bool, output_format_sql_insert_include_column_names, true, "Include column names in INSERT query", 0) \ M(Bool, output_format_sql_insert_use_replace, false, "Use REPLACE statement instead of INSERT", 0) \ + M(Bool, output_format_sql_insert_quote_names, true, "Quote column names with '`' characters", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index c1714279a9c..756b33d3eb2 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -162,6 +162,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.sql_insert.include_column_names = settings.output_format_sql_insert_include_column_names; format_settings.sql_insert.table_name = settings.output_format_sql_insert_table_name; format_settings.sql_insert.use_replace = settings.output_format_sql_insert_use_replace; + format_settings.sql_insert.quote_names = settings.output_format_sql_insert_quote_names; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 44e305e9eb4..70bf8979383 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -277,10 +278,11 @@ struct FormatSettings struct { - UInt64 max_batch_size = 65505; + UInt64 max_batch_size = DEFAULT_BLOCK_SIZE; String table_name = "table"; bool include_column_names = true; bool use_replace = false; + bool quote_names = true; } sql_insert; }; diff --git a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp index aa714af2716..749b4b40984 100644 --- a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp @@ -13,28 +13,44 @@ SQLInsertRowOutputFormat::SQLInsertRowOutputFormat(WriteBuffer & out_, const Blo void SQLInsertRowOutputFormat::writeRowStartDelimiter() { if (rows_in_line == 0) - { - if (format_settings.sql_insert.use_replace) - writeCString("REPLACE INTO ", out); - else - writeCString("INSERT INTO ", out); - writeString(format_settings.sql_insert.table_name, out); - if (format_settings.sql_insert.include_column_names) - { - writeCString(" (", out); - for (size_t i = 0; i != column_names.size(); ++i) - { - writeString(column_names[i], out); - if (i + 1 != column_names.size()) - writeCString(", ", out); - } - writeChar(')', out); - } - writeCString(" VALUES ", out); - } + printLineStart(); writeChar('(', out); } +void SQLInsertRowOutputFormat::printLineStart() +{ + if (format_settings.sql_insert.use_replace) + writeCString("REPLACE INTO ", out); + else + writeCString("INSERT INTO ", out); + + writeString(format_settings.sql_insert.table_name, out); + + if (format_settings.sql_insert.include_column_names) + printColumnNames(); + + writeCString(" VALUES ", out); +} + +void SQLInsertRowOutputFormat::printColumnNames() +{ + writeCString(" (", out); + for (size_t i = 0; i != column_names.size(); ++i) + { + if (format_settings.sql_insert.quote_names) + writeChar('`', out); + + writeString(column_names[i], out); + + if (format_settings.sql_insert.quote_names) + writeChar('`', out); + + if (i + 1 != column_names.size()) + writeCString(", ", out); + } + writeChar(')', out); +} + void SQLInsertRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { serialization.serializeTextQuoted(column, row_num, out, format_settings); diff --git a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h index de39c82abac..aaaf39a9e4d 100644 --- a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h +++ b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h @@ -32,6 +32,9 @@ protected: virtual void writeRowBetweenDelimiter() override; virtual void writeSuffix() override; + void printLineStart(); + void printColumnNames(); + size_t rows_in_line = 0; Names column_names; const FormatSettings format_settings; diff --git a/tests/queries/0_stateless/02322_sql_insert_format.reference b/tests/queries/0_stateless/02322_sql_insert_format.reference index 220ee09e140..e64ef587fa7 100644 --- a/tests/queries/0_stateless/02322_sql_insert_format.reference +++ b/tests/queries/0_stateless/02322_sql_insert_format.reference @@ -1,17 +1,23 @@ -INSERT INTO table (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); -INSERT INTO table (x, y, z) VALUES (0, 0, 'Hello'); -INSERT INTO table (x, y, z) VALUES (1, 1, 'Hello'); -INSERT INTO table (x, y, z) VALUES (2, 2, 'Hello'); -INSERT INTO table (x, y, z) VALUES (3, 0, 'Hello'); -INSERT INTO table (x, y, z) VALUES (4, 1, 'Hello'); -INSERT INTO table (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'); -INSERT INTO table (x, y, z) VALUES (2, 2, 'Hello'), (3, 0, 'Hello'); -INSERT INTO table (x, y, z) VALUES (4, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (0, 0, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (1, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (2, 2, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (3, 0, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (4, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (2, 2, 'Hello'), (3, 0, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (4, 1, 'Hello'); INSERT INTO table VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); -REPLACE INTO table (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +REPLACE INTO table (`x`, `y`, `z`) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +INSERT INTO test (`x`, `y`, `z`) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); INSERT INTO test (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); 0 0 Hello 1 1 Hello 2 2 Hello 3 0 Hello 4 1 Hello +0 0 Hello +1 1 Hello +2 2 Hello +3 0 Hello +4 1 Hello diff --git a/tests/queries/0_stateless/02322_sql_insert_format.sql b/tests/queries/0_stateless/02322_sql_insert_format.sql index e00f1cd300b..adc28c1d01e 100644 --- a/tests/queries/0_stateless/02322_sql_insert_format.sql +++ b/tests/queries/0_stateless/02322_sql_insert_format.sql @@ -6,5 +6,8 @@ select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInse select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_include_column_names=0; select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_use_replace=1; select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_table_name='test'; -insert into function file(02322_data.sql, 'SQLInsert') select number as x, number % 3 as y, 'Hello' as z from numbers(5) settings output_format_sql_insert_max_batch_size=2, engine_file_truncate_on_insert=1; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_table_name='test', output_format_sql_quote_names=0; +insert into function file(02322_data.sql, 'SQLInsert') select number as x, number % 3 as y, 'Hello' as z from numbers(5) settings output_format_sql_insert_max_batch_size=2, output_format_sql_quote_names=0, engine_file_truncate_on_insert=1; +select * from file(02322_data.sql, 'MySQLDump'); +insert into function file(02322_data.sql, 'SQLInsert') select number, number % 3, 'Hello' from numbers(5) settings output_format_sql_insert_max_batch_size=2, engine_file_truncate_on_insert=1; select * from file(02322_data.sql, 'MySQLDump'); From 34a92383f18db527e376b9875d58d84f58d4f54f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 30 Jun 2022 16:17:14 +0000 Subject: [PATCH 4/5] Update docs --- docs/en/interfaces/formats.md | 1 + docs/en/operations/settings/settings.md | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 27f42985fb3..5d8ed9cdacd 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -495,6 +495,7 @@ To read data output by this format ypu can use [MySQLDump](#mysqldump) input for - [output_format_sql_insert_table_name](../operations/settings/settings.md#output_format_sql_insert_table_name) - The name of table in the output INSERT query. Default value - `'table'`. - [output_format_sql_insert_include_column_names](../operations/settings/settings.md#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. - [output_format_sql_insert_use_replace](../operations/settings/settings.md#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. +- [output_format_sql_insert_quote_names](../operations/settings/settings.md#output_format_sql_insert_quote_names) - Quote column names with "\`" characters . Default value - `true`. ## JSON {#json} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 2fc124f9bab..75c2aa57b32 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4663,3 +4663,9 @@ Default value: `true`. Use REPLACE keyword instead of INSERT. Default value: `false`. + +### output_format_sql_insert_quote_names {#output_format_sql_insert_quote_names} + +Quote column names with "`" characters + +Default value: `true`. From 800baf5299aa029816821c42a9274b8389f8c879 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 1 Jul 2022 19:54:43 +0200 Subject: [PATCH 5/5] Fix test --- tests/queries/0_stateless/02322_sql_insert_format.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02322_sql_insert_format.sql b/tests/queries/0_stateless/02322_sql_insert_format.sql index adc28c1d01e..34cde1e56b6 100644 --- a/tests/queries/0_stateless/02322_sql_insert_format.sql +++ b/tests/queries/0_stateless/02322_sql_insert_format.sql @@ -6,8 +6,8 @@ select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInse select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_include_column_names=0; select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_use_replace=1; select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_table_name='test'; -select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_table_name='test', output_format_sql_quote_names=0; -insert into function file(02322_data.sql, 'SQLInsert') select number as x, number % 3 as y, 'Hello' as z from numbers(5) settings output_format_sql_insert_max_batch_size=2, output_format_sql_quote_names=0, engine_file_truncate_on_insert=1; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_table_name='test', output_format_sql_insert_quote_names=0; +insert into function file(02322_data.sql, 'SQLInsert') select number as x, number % 3 as y, 'Hello' as z from numbers(5) settings output_format_sql_insert_max_batch_size=2, output_format_sql_insert_quote_names=0, engine_file_truncate_on_insert=1; select * from file(02322_data.sql, 'MySQLDump'); insert into function file(02322_data.sql, 'SQLInsert') select number, number % 3, 'Hello' from numbers(5) settings output_format_sql_insert_max_batch_size=2, engine_file_truncate_on_insert=1; select * from file(02322_data.sql, 'MySQLDump');