mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Add input format One
This commit is contained in:
parent
3ffffb0b5e
commit
01a7c7560f
@ -11,82 +11,83 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
|
||||
The supported formats are:
|
||||
|
||||
| Format | Input | Output |
|
||||
|-------------------------------------------------------------------------------------------|------|--------|
|
||||
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Template](#format-template) | ✔ | ✔ |
|
||||
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
|
||||
| [CSV](#csv) | ✔ | ✔ |
|
||||
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
|
||||
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
|
||||
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [SQLInsert](#sqlinsert) | ✗ | ✔ |
|
||||
| [Values](#data-format-values) | ✔ | ✔ |
|
||||
| [Vertical](#vertical) | ✗ | ✔ |
|
||||
| [JSON](#json) | ✔ | ✔ |
|
||||
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
|
||||
| [JSONStrings](#jsonstrings) | ✔ | ✔ |
|
||||
| [JSONColumns](#jsoncolumns) | ✔ | ✔ |
|
||||
| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ |
|
||||
| [JSONCompact](#jsoncompact) | ✔ | ✔ |
|
||||
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
|
||||
| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
|
||||
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ |
|
||||
| [BSONEachRow](#bsoneachrow) | ✔ | ✔ |
|
||||
| [TSKV](#tskv) | ✔ | ✔ |
|
||||
| [Pretty](#pretty) | ✗ | ✔ |
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettyMonoBlock](#prettymonoblock) | ✗ | ✔ |
|
||||
| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock) | ✗ | ✔ |
|
||||
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
|
||||
| [PrettyCompactNoEscapes](#prettycompactnoescapes) | ✗ | ✔ |
|
||||
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
|
||||
| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [PrettySpaceNoEscapes](#prettyspacenoescapes) | ✗ | ✔ |
|
||||
| [PrettySpaceMonoBlock](#prettyspacemonoblock) | ✗ | ✔ |
|
||||
| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock) | ✗ | ✔ |
|
||||
| [Prometheus](#prometheus) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
| [CapnProto](#capnproto) | ✔ | ✔ |
|
||||
| [LineAsString](#lineasstring) | ✔ | ✔ |
|
||||
| [Regexp](#data-format-regexp) | ✔ | ✗ |
|
||||
| [RawBLOB](#rawblob) | ✔ | ✔ |
|
||||
| [MsgPack](#msgpack) | ✔ | ✔ |
|
||||
| [MySQLDump](#mysqldump) | ✔ | ✗ |
|
||||
| [Markdown](#markdown) | ✗ | ✔ |
|
||||
|-------------------------------------------------------------------------------------------|------|-------|
|
||||
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Template](#format-template) | ✔ | ✔ |
|
||||
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
|
||||
| [CSV](#csv) | ✔ | ✔ |
|
||||
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
|
||||
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
|
||||
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [SQLInsert](#sqlinsert) | ✗ | ✔ |
|
||||
| [Values](#data-format-values) | ✔ | ✔ |
|
||||
| [Vertical](#vertical) | ✗ | ✔ |
|
||||
| [JSON](#json) | ✔ | ✔ |
|
||||
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
|
||||
| [JSONStrings](#jsonstrings) | ✔ | ✔ |
|
||||
| [JSONColumns](#jsoncolumns) | ✔ | ✔ |
|
||||
| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ |
|
||||
| [JSONCompact](#jsoncompact) | ✔ | ✔ |
|
||||
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
|
||||
| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
|
||||
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ |
|
||||
| [BSONEachRow](#bsoneachrow) | ✔ | ✔ |
|
||||
| [TSKV](#tskv) | ✔ | ✔ |
|
||||
| [Pretty](#pretty) | ✗ | ✔ |
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettyMonoBlock](#prettymonoblock) | ✗ | ✔ |
|
||||
| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock) | ✗ | ✔ |
|
||||
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
|
||||
| [PrettyCompactNoEscapes](#prettycompactnoescapes) | ✗ | ✔ |
|
||||
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
|
||||
| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [PrettySpaceNoEscapes](#prettyspacenoescapes) | ✗ | ✔ |
|
||||
| [PrettySpaceMonoBlock](#prettyspacemonoblock) | ✗ | ✔ |
|
||||
| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock) | ✗ | ✔ |
|
||||
| [Prometheus](#prometheus) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [One](#data-format-one) | ✔ | ✗ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
| [CapnProto](#capnproto) | ✔ | ✔ |
|
||||
| [LineAsString](#lineasstring) | ✔ | ✔ |
|
||||
| [Regexp](#data-format-regexp) | ✔ | ✗ |
|
||||
| [RawBLOB](#rawblob) | ✔ | ✔ |
|
||||
| [MsgPack](#msgpack) | ✔ | ✔ |
|
||||
| [MySQLDump](#mysqldump) | ✔ | ✗ |
|
||||
| [Markdown](#markdown) | ✗ | ✔ |
|
||||
|
||||
|
||||
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section.
|
||||
@ -2131,6 +2132,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
|
||||
- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
|
||||
- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`.
|
||||
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`.
|
||||
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
|
||||
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
|
||||
@ -2335,6 +2337,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
|
||||
- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`.
|
||||
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
|
||||
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
|
||||
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
|
||||
@ -2400,6 +2403,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
|
||||
|
||||
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
|
||||
- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
|
||||
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
|
||||
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
|
||||
@ -2407,6 +2411,34 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
|
||||
|
||||
To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md).
|
||||
|
||||
## One {#data-format-one}
|
||||
|
||||
Special input format that doesn't read any data from file and returns only one row with column of type `UInt8`, name `dummy` and value `0` (like `system.one` table).
|
||||
Can be used with virtual columns `_file/_path` to list all files without reading actual data.
|
||||
|
||||
Example:
|
||||
|
||||
Query:
|
||||
```sql
|
||||
SELECT _file FROM file('path/to/files/data*', One);
|
||||
```
|
||||
|
||||
Result:
|
||||
```text
|
||||
┌─_file────┐
|
||||
│ data.csv │
|
||||
└──────────┘
|
||||
┌─_file──────┐
|
||||
│ data.jsonl │
|
||||
└────────────┘
|
||||
┌─_file────┐
|
||||
│ data.tsv │
|
||||
└──────────┘
|
||||
┌─_file────────┐
|
||||
│ data.parquet │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## LineAsString {#lineasstring}
|
||||
|
||||
In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted.
|
||||
|
@ -101,6 +101,7 @@ void registerInputFormatJSONAsObject(FormatFactory & factory);
|
||||
void registerInputFormatLineAsString(FormatFactory & factory);
|
||||
void registerInputFormatMySQLDump(FormatFactory & factory);
|
||||
void registerInputFormatParquetMetadata(FormatFactory & factory);
|
||||
void registerInputFormatOne(FormatFactory & factory);
|
||||
|
||||
#if USE_HIVE
|
||||
void registerInputFormatHiveText(FormatFactory & factory);
|
||||
@ -142,6 +143,7 @@ void registerTemplateSchemaReader(FormatFactory & factory);
|
||||
void registerMySQLSchemaReader(FormatFactory & factory);
|
||||
void registerBSONEachRowSchemaReader(FormatFactory & factory);
|
||||
void registerParquetMetadataSchemaReader(FormatFactory & factory);
|
||||
void registerOneSchemaReader(FormatFactory & factory);
|
||||
|
||||
void registerFileExtensions(FormatFactory & factory);
|
||||
|
||||
@ -243,6 +245,7 @@ void registerFormats()
|
||||
registerInputFormatMySQLDump(factory);
|
||||
|
||||
registerInputFormatParquetMetadata(factory);
|
||||
registerInputFormatOne(factory);
|
||||
|
||||
registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory);
|
||||
registerNonTrivialPrefixAndSuffixCheckerJSONAsString(factory);
|
||||
@ -279,6 +282,7 @@ void registerFormats()
|
||||
registerMySQLSchemaReader(factory);
|
||||
registerBSONEachRowSchemaReader(factory);
|
||||
registerParquetMetadataSchemaReader(factory);
|
||||
registerOneSchemaReader(factory);
|
||||
}
|
||||
|
||||
}
|
||||
|
60
src/Processors/Formats/Impl/OneFormat.cpp
Normal file
60
src/Processors/Formats/Impl/OneFormat.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <Processors/Formats/Impl/OneFormat.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
OneInputFormat::OneInputFormat(const Block & header, ReadBuffer & in_) : IInputFormat(header, &in_)
|
||||
{
|
||||
if (header.columns() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"One input format is only suitable for tables with a single column of type UInt8 but the number of columns is {}",
|
||||
header.columns());
|
||||
|
||||
if (!WhichDataType(header.getByPosition(0).type).isUInt8())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"One input format is only suitable for tables with a single column of type String but the column type is {}",
|
||||
header.getByPosition(0).type->getName());
|
||||
}
|
||||
|
||||
Chunk OneInputFormat::generate()
|
||||
{
|
||||
if (done)
|
||||
return {};
|
||||
|
||||
done = true;
|
||||
auto column = ColumnUInt8::create();
|
||||
column->insertDefault();
|
||||
return Chunk(Columns{std::move(column)}, 1);
|
||||
}
|
||||
|
||||
void registerInputFormatOne(FormatFactory & factory)
|
||||
{
|
||||
factory.registerInputFormat("One", [](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowInputFormatParams &,
|
||||
const FormatSettings &)
|
||||
{
|
||||
return std::make_shared<OneInputFormat>(sample, buf);
|
||||
});
|
||||
}
|
||||
|
||||
void registerOneSchemaReader(FormatFactory & factory)
|
||||
{
|
||||
factory.registerExternalSchemaReader("One", [](const FormatSettings &)
|
||||
{
|
||||
return std::make_shared<OneSchemaReader>();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
32
src/Processors/Formats/Impl/OneFormat.h
Normal file
32
src/Processors/Formats/Impl/OneFormat.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
#include <Processors/Formats/ISchemaReader.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class OneInputFormat final : public IInputFormat
|
||||
{
|
||||
public:
|
||||
OneInputFormat(const Block & header, ReadBuffer & in_);
|
||||
|
||||
String getName() const override { return "One"; }
|
||||
|
||||
protected:
|
||||
Chunk generate() override;
|
||||
|
||||
private:
|
||||
bool done = false;
|
||||
};
|
||||
|
||||
class OneSchemaReader: public IExternalSchemaReader
|
||||
{
|
||||
public:
|
||||
NamesAndTypesList readSchema() override
|
||||
{
|
||||
return {{"dummy", std::make_shared<DataTypeUInt8>()}};
|
||||
}
|
||||
};
|
||||
|
||||
}
|
12
tests/queries/0_stateless/02842_one_input_format.reference
Normal file
12
tests/queries/0_stateless/02842_one_input_format.reference
Normal file
@ -0,0 +1,12 @@
|
||||
dummy UInt8
|
||||
0
|
||||
0
|
||||
0
|
||||
data.csv
|
||||
data.jsonl
|
||||
data.parquet
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
22
tests/queries/0_stateless/02842_one_input_format.sh
Executable file
22
tests/queries/0_stateless/02842_one_input_format.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
FILE_DIR=$CLICKHOUSE_TEST_UNIQUE_NAME
|
||||
mkdir -p $FILE_DIR
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format Parquet" > $FILE_DIR/data.parquet
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format CSV" > $FILE_DIR/data.csv
|
||||
$CLICKHOUSE_LOCAL -q "select * from numbers(100000) format JSONEachRow" > $FILE_DIR/data.jsonl
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "desc file('$FILE_DIR/*', One)"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One)"
|
||||
$CLICKHOUSE_LOCAL -q "select _file from file('$FILE_DIR/*', One)"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt8')"
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt64')" 2>&1 | grep "BAD_ARGUMENTS" -c
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$FILE_DIR/*', One, 'x UInt8, y UInt8')" 2>&1 | grep "BAD_ARGUMENTS" -c
|
||||
|
||||
rm -rf $FILE_DIR
|
||||
|
Loading…
Reference in New Issue
Block a user