mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge pull request #58047 from Avogar/variant-data-type
Implement Variant data type
This commit is contained in:
commit
6858d2f4ca
@ -5176,6 +5176,95 @@ When set to `false` than all attempts are made with identical timeouts.
|
||||
|
||||
Default value: `true`.
|
||||
|
||||
## allow_experimental_variant_type {#allow_experimental_variant_type}
|
||||
|
||||
Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md).
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## use_variant_as_common_type {#use_variant_as_common_type}
|
||||
|
||||
Allows to use `Variant` type as a result type for [if](../../sql-reference/functions/conditional-functions.md/#if)/[multiIf](../../sql-reference/functions/conditional-functions.md/#multiif)/[array](../../sql-reference/functions/array-functions.md)/[map](../../sql-reference/functions/tuple-map-functions.md) functions when there is no common type for argument types.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
SET use_variant_as_common_type = 1;
|
||||
SELECT toTypeName(if(number % 2, number, range(number))) as variant_type FROM numbers(1);
|
||||
SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─variant_type───────────────────┐
|
||||
│ Variant(Array(UInt64), UInt64) │
|
||||
└────────────────────────────────┘
|
||||
┌─variant───┐
|
||||
│ [] │
|
||||
│ 1 │
|
||||
│ [0,1] │
|
||||
│ 3 │
|
||||
│ [0,1,2,3] │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SET use_variant_as_common_type = 1;
|
||||
SELECT toTypeName(multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL)) AS variant_type FROM numbers(1);
|
||||
SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
|
||||
```
|
||||
|
||||
```text
|
||||
─variant_type─────────────────────────┐
|
||||
│ Variant(Array(UInt8), String, UInt8) │
|
||||
└──────────────────────────────────────┘
|
||||
|
||||
┌─variant───────┐
|
||||
│ 42 │
|
||||
│ [1,2,3] │
|
||||
│ Hello, World! │
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SET use_variant_as_common_type = 1;
|
||||
SELECT toTypeName(array(range(number), number, 'str_' || toString(number))) as array_of_variants_type from numbers(1);
|
||||
SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─array_of_variants_type────────────────────────┐
|
||||
│ Array(Variant(Array(UInt64), String, UInt64)) │
|
||||
└───────────────────────────────────────────────┘
|
||||
|
||||
┌─array_of_variants─┐
|
||||
│ [[],0,'str_0'] │
|
||||
│ [[0],1,'str_1'] │
|
||||
│ [[0,1],2,'str_2'] │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SET use_variant_as_common_type = 1;
|
||||
SELECT toTypeName(map('a', range(number), 'b', number, 'c', 'str_' || toString(number))) as map_of_variants_type from numbers(1);
|
||||
SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─map_of_variants_type────────────────────────────────┐
|
||||
│ Map(String, Variant(Array(UInt64), String, UInt64)) │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
|
||||
┌─map_of_variants───────────────┐
|
||||
│ {'a':[],'b':0,'c':'str_0'} │
|
||||
│ {'a':[0],'b':1,'c':'str_1'} │
|
||||
│ {'a':[0,1],'b':2,'c':'str_2'} │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## max_partition_size_to_drop
|
||||
|
||||
Restriction on dropping partitions in query time.
|
||||
|
245
docs/en/sql-reference/data-types/variant.md
Normal file
245
docs/en/sql-reference/data-types/variant.md
Normal file
@ -0,0 +1,245 @@
|
||||
---
|
||||
slug: /en/sql-reference/data-types/json
|
||||
sidebar_position: 55
|
||||
sidebar_label: Variant
|
||||
---
|
||||
|
||||
# Variant(T1, T2, T3, ...)
|
||||
|
||||
This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type
|
||||
has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value).
|
||||
|
||||
The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
|
||||
Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types.
|
||||
|
||||
:::note
|
||||
The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`.
|
||||
:::
|
||||
|
||||
## Creating Variant
|
||||
|
||||
Using `Variant` type in table column definition:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
|
||||
SELECT v FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v─────────────┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
│ 42 │
|
||||
│ Hello, World! │
|
||||
│ [1,2,3] │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Using CAST from ordinary columns:
|
||||
|
||||
```sql
|
||||
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─type_name──────────────────────────────┬─variant───────┐
|
||||
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
|
||||
└────────────────────────────────────────┴───────────────┘
|
||||
```
|
||||
|
||||
Using functions `if/multiIf` when arguments don't have common type (setting `use_variant_as_common_type` should be enabled for it):
|
||||
|
||||
```sql
|
||||
SET use_variant_as_common_type = 1;
|
||||
SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─variant───┐
|
||||
│ [] │
|
||||
│ 1 │
|
||||
│ [0,1] │
|
||||
│ 3 │
|
||||
│ [0,1,2,3] │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SET use_variant_as_common_type = 1;
|
||||
SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─variant───────┐
|
||||
│ 42 │
|
||||
│ [1,2,3] │
|
||||
│ Hello, World! │
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Using functions 'array/map' if array elements/map values don't have common type (setting `use_variant_as_common_type` should be enabled for it):
|
||||
|
||||
```sql
|
||||
SET use_variant_as_common_type = 1;
|
||||
SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─array_of_variants─┐
|
||||
│ [[],0,'str_0'] │
|
||||
│ [[0],1,'str_1'] │
|
||||
│ [[0,1],2,'str_2'] │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SET use_variant_as_common_type = 1;
|
||||
SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─map_of_variants───────────────┐
|
||||
│ {'a':[],'b':0,'c':'str_0'} │
|
||||
│ {'a':[0],'b':1,'c':'str_1'} │
|
||||
│ {'a':[0,1],'b':2,'c':'str_2'} │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
## Reading Variant nested types as subcolumns
|
||||
|
||||
Variant type supports reading a single nested type from a Variant column using the type name as a subcolumn.
|
||||
So, if you have column `variant Variant(T1, T2, T3)` you can read a subcolumn of type `T2` using syntax `variant.T2`,
|
||||
this subcolumn will have type `Nullable(T2)` if `T2` can be inside `Nullable` and `T2` otherwise. This subcolumn will
|
||||
be the same size as original `Variant` column and will contain `NULL` values (or empty values if `T2` cannot be inside `Nullable`)
|
||||
in all rows in which original `Variant` column doesn't have type `T2`.
|
||||
|
||||
Variant subcolumns can be also read using function `variantElement(variant_column, type_name)`.
|
||||
|
||||
Examples:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
|
||||
SELECT v, v.String, v.UInt64, v.`Array(UInt64)` FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v─────────────┬─v.String──────┬─v.UInt64─┬─v.Array(UInt64)─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
|
||||
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
|
||||
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
|
||||
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
|
||||
└───────────────┴───────────────┴──────────┴─────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT toTypeName(v.String), toTypeName(v.UInt64), toTypeName(v.`Array(UInt64)`) FROM test LIMIT 1;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─toTypeName(v.String)─┬─toTypeName(v.UInt64)─┬─toTypeName(v.Array(UInt64))─┐
|
||||
│ Nullable(String) │ Nullable(UInt64) │ Array(UInt64) │
|
||||
└──────────────────────┴──────────────────────┴─────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
|
||||
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
|
||||
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
|
||||
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
|
||||
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Conversion between Variant column and other columns
|
||||
|
||||
There are 3 possible conversions that can be performed with Variant column.
|
||||
|
||||
### Converting an ordinary column to a Variant column
|
||||
|
||||
It is possible to convert ordinary column with type `T` to a `Variant` column containing this type:
|
||||
|
||||
```sql
|
||||
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─type_name──────────────────────────────┬─variant───────┐
|
||||
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
|
||||
└────────────────────────────────────────┴───────────────┘
|
||||
```
|
||||
|
||||
### Converting a Variant column to an ordinary column
|
||||
|
||||
It is possible to convert a `Variant` column to an ordinary column. In this case all nested variants will be converted to a destination type:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), ('42.42');
|
||||
SELECT v::Nullable(Float64) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─CAST(v, 'Nullable(Float64)')─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
│ 42 │
|
||||
│ 42.42 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
### Converting a Variant to another Variant
|
||||
|
||||
It is possible to convert a `Variant` column to another `Variant` column, but only if the destination `Variant` column contains all nested types from the original `Variant`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), ('String');
|
||||
SELECT v::Variant(UInt64, String, Array(UInt64)) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─CAST(v, 'Variant(UInt64, String, Array(UInt64))')─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
│ 42 │
|
||||
│ String │
|
||||
└───────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## Reading Variant type from the data
|
||||
|
||||
All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Variant` type. During data parsing ClickHouse tries to insert value into most appropriate variant type.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
v,
|
||||
variantElement(v, 'String') AS str,
|
||||
variantElement(v, 'UInt64') AS num,
|
||||
variantElement(v, 'Float64') AS float,
|
||||
variantElement(v, 'DateTime') AS date,
|
||||
variantElement(v, 'Array(UInt64)') AS arr
|
||||
FROM format(JSONEachRow, 'v Variant(String, UInt64, Float64, DateTime, Array(UInt64))', $$
|
||||
{"v" : "Hello, World!"},
|
||||
{"v" : 42},
|
||||
{"v" : 42.42},
|
||||
{"v" : "2020-01-01 00:00:00"},
|
||||
{"v" : [1, 2, 3]}
|
||||
$$)
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v───────────────────┬─str───────────┬──num─┬─float─┬────────────────date─┬─arr─────┐
|
||||
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
|
||||
│ 42 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
|
||||
│ 42.42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │
|
||||
│ 2020-01-01 00:00:00 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 00:00:00 │ [] │
|
||||
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
|
||||
└─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘
|
||||
```
|
@ -2832,6 +2832,43 @@ Result:
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## variantElement
|
||||
|
||||
Extracts a column with specified type from a `Variant` column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
variantElement(variant, type_name, [, default_value])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
|
||||
- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md).
|
||||
- `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Subcolumn of a `Variant` column with specified type.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
|
||||
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
|
||||
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
|
||||
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
|
||||
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
|
||||
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## minSampleSizeConversion
|
||||
|
||||
Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples.
|
||||
|
@ -181,6 +181,23 @@ public:
|
||||
|
||||
node = std::make_shared<ColumnNode>(column, column_source);
|
||||
}
|
||||
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
|
||||
{
|
||||
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
|
||||
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
|
||||
String subcolumn_name;
|
||||
|
||||
if (variant_element_constant_value.getType() != Field::Types::String)
|
||||
return;
|
||||
|
||||
subcolumn_name = variant_element_constant_value.get<const String &>();
|
||||
|
||||
column.name += '.';
|
||||
column.name += subcolumn_name;
|
||||
column.type = function_node->getResultType();
|
||||
|
||||
node = std::make_shared<ColumnNode>(column, column_source);
|
||||
}
|
||||
else if (function_name == "mapContains" && column_type.isMap())
|
||||
{
|
||||
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);
|
||||
|
@ -159,4 +159,26 @@ void ColumnConst::compareColumn(
|
||||
std::fill(compare_results.begin(), compare_results.end(), res);
|
||||
}
|
||||
|
||||
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value)
|
||||
{
|
||||
auto data = column->cloneEmpty();
|
||||
data->insert(value);
|
||||
return ColumnConst::create(std::move(data), 1);
|
||||
}
|
||||
|
||||
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, size_t const_value_index)
|
||||
{
|
||||
auto data = column->cloneEmpty();
|
||||
data->insertFrom(*column, const_value_index);
|
||||
return ColumnConst::create(std::move(data), 1);
|
||||
}
|
||||
|
||||
ColumnConst::Ptr createColumnConstWithDefaultValue(const ColumnPtr & column)
|
||||
{
|
||||
auto data = column->cloneEmpty();
|
||||
data->insertDefault();
|
||||
return ColumnConst::create(std::move(data), 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -292,4 +292,9 @@ public:
|
||||
bool isCollationSupported() const override { return data->isCollationSupported(); }
|
||||
};
|
||||
|
||||
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value);
|
||||
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, size_t const_value_index);
|
||||
ColumnConst::Ptr createColumnConstWithDefaultValue(const ColumnPtr &column);
|
||||
|
||||
|
||||
}
|
||||
|
@ -141,6 +141,11 @@ void ColumnMap::updateHashFast(SipHash & hash) const
|
||||
nested->updateHashFast(hash);
|
||||
}
|
||||
|
||||
void ColumnMap::insertFrom(const IColumn & src, size_t n)
|
||||
{
|
||||
nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n);
|
||||
}
|
||||
|
||||
void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
nested->insertRangeFrom(
|
||||
|
@ -64,6 +64,7 @@ public:
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
void insertFrom(const IColumn & src_, size_t n) override;
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
void expand(const Filter & mask, bool inverted) override;
|
||||
|
@ -833,24 +833,22 @@ void ColumnNullable::checkConsistency() const
|
||||
"Logical error: Sizes of nested column and null map of Nullable column are not equal");
|
||||
}
|
||||
|
||||
ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
|
||||
ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
|
||||
{
|
||||
ColumnPtr new_values;
|
||||
ColumnPtr new_null_map;
|
||||
|
||||
if (default_field.getType() == Field::Types::Null)
|
||||
const ColumnNullable & nullable_column_with_default_value = assert_cast<const ColumnNullable &>(column_with_default_value.getDataColumn());
|
||||
if (nullable_column_with_default_value.isNullAt(0))
|
||||
{
|
||||
auto default_column = nested_column->cloneEmpty();
|
||||
default_column->insertDefault();
|
||||
|
||||
/// Value in main column, when null map is 1 is implementation defined. So, take any value.
|
||||
new_values = nested_column->createWithOffsets(offsets, (*default_column)[0], total_rows, shift);
|
||||
new_null_map = null_map->createWithOffsets(offsets, Field(1u), total_rows, shift);
|
||||
new_values = nested_column->createWithOffsets(offsets, *createColumnConstWithDefaultValue(nested_column), total_rows, shift);
|
||||
new_null_map = null_map->createWithOffsets(offsets, *createColumnConst(null_map, Field(1u)), total_rows, shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
new_values = nested_column->createWithOffsets(offsets, default_field, total_rows, shift);
|
||||
new_null_map = null_map->createWithOffsets(offsets, Field(0u), total_rows, shift);
|
||||
new_values = nested_column->createWithOffsets(offsets, *ColumnConst::create(nullable_column_with_default_value.getNestedColumnPtr(), 1), total_rows, shift);
|
||||
new_null_map = null_map->createWithOffsets(offsets, *createColumnConst(null_map, Field(0u)), total_rows, shift);
|
||||
}
|
||||
|
||||
return ColumnNullable::create(new_values, new_null_map);
|
||||
@ -896,10 +894,7 @@ ColumnPtr makeNullable(const ColumnPtr & column)
|
||||
|
||||
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column)
|
||||
{
|
||||
if (isColumnNullable(*column))
|
||||
return column;
|
||||
|
||||
if (isColumnLowCardinalityNullable(*column))
|
||||
if (isColumnNullableOrLowCardinalityNullable(*column))
|
||||
return column;
|
||||
|
||||
if (isColumnConst(*column))
|
||||
@ -925,4 +920,21 @@ ColumnPtr makeNullableSafe(const ColumnPtr & column)
|
||||
return column;
|
||||
}
|
||||
|
||||
ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column)
|
||||
{
|
||||
if (isColumnNullableOrLowCardinalityNullable(*column))
|
||||
return column;
|
||||
|
||||
if (isColumnConst(*column))
|
||||
return ColumnConst::create(makeNullableOrLowCardinalityNullableSafe(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size());
|
||||
|
||||
if (column->lowCardinality())
|
||||
return assert_cast<const ColumnLowCardinality &>(*column).cloneNullable();
|
||||
|
||||
if (column->canBeInsideNullable())
|
||||
return makeNullable(column);
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -168,7 +168,7 @@ public:
|
||||
getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
|
||||
}
|
||||
|
||||
ColumnPtr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override;
|
||||
ColumnPtr createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
|
||||
|
||||
bool isNullable() const override { return true; }
|
||||
bool isFixedAndContiguous() const override { return false; }
|
||||
@ -232,5 +232,6 @@ private:
|
||||
ColumnPtr makeNullable(const ColumnPtr & column);
|
||||
ColumnPtr makeNullableSafe(const ColumnPtr & column);
|
||||
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column);
|
||||
ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column);
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Columns/ColumnObject.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Common/iota.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
@ -475,7 +476,7 @@ void ColumnObject::Subcolumn::finalize()
|
||||
{
|
||||
auto values = part->index(*offsets, offsets->size());
|
||||
values = castColumn({values, from_type, ""}, to_type);
|
||||
part = values->createWithOffsets(offsets_data, to_type->getDefault(), part_size, /*shift=*/ 0);
|
||||
part = values->createWithOffsets(offsets_data, *createColumnConstWithDefaultValue(result_column->getPtr()), part_size, /*shift=*/ 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/ColumnSparse.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
@ -130,7 +131,7 @@ StringRef ColumnSparse::getDataAt(size_t n) const
|
||||
|
||||
ColumnPtr ColumnSparse::convertToFullColumnIfSparse() const
|
||||
{
|
||||
return values->createWithOffsets(getOffsetsData(), (*values)[0], _size, /*shift=*/ 1);
|
||||
return values->createWithOffsets(getOffsetsData(), *createColumnConst(values, 0), _size, /*shift=*/ 1);
|
||||
}
|
||||
|
||||
void ColumnSparse::insertSingleValue(const Inserter & inserter)
|
||||
|
1348
src/Columns/ColumnVariant.cpp
Normal file
1348
src/Columns/ColumnVariant.cpp
Normal file
File diff suppressed because it is too large
Load Diff
307
src/Columns/ColumnVariant.h
Normal file
307
src/Columns/ColumnVariant.h
Normal file
@ -0,0 +1,307 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column for storing Variant(...) type values.
|
||||
* Variant type represents a union of other data types.
|
||||
* For example, type Variant(T1, T2, ..., TN) means that each row of this type
|
||||
* has a value of either type T1 or T2 or ... or TN or none of them (NULL value)
|
||||
*
|
||||
* ColumnVariant stores:
|
||||
* - The discriminators column, which determines which variant is stored in each row.
|
||||
* - The offsets column, which determines the offset in the corresponding variant column in each row.
|
||||
* - The list of variant columns with only real values (so the sizes of variant columns can be different).
|
||||
* Discriminator is an index of a variant in the variants list, it also has special value called NULL_DISCRIMINATOR
|
||||
* that indicates that the value in the row is NULL.
|
||||
*
|
||||
* We want to be able to extend Variant column for free without rewriting the data, but as we don't care about the
|
||||
* order of variants during Variant creation (we want Variant(T1, T2) to be the same as Variant(T2, T1)), we support
|
||||
* some global order of nested types inside Variant during type creation, so after extension the order of variant types
|
||||
* (and so their discriminators) can change. For example: Variant(T1, T3) -> Variant(T1, T2, T3).
|
||||
* To avoid full rewrite of discriminators column on Variant extension, we differentiate local order of variants
|
||||
* inside a column and global order of variants created during type creation. So, ColumnVariant stores only local
|
||||
* discriminators and additionally stores the mapping between global and local discriminators.
|
||||
* So, when we need to extend Variant column with new variant, we can just append it to a list of variant columns
|
||||
* with new local discriminator and update mapping from global to local orders.
|
||||
*
|
||||
* Note that two instances of ColumnVariant can have different local orders, so we should always use global
|
||||
* discriminators during inter-column interactions.
|
||||
*
|
||||
* Let's take an example with type Variant(UInt32, String, Array(UInt32)):
|
||||
* During type creation we will sort types by their names and get the global order: Array(UInt32), String, UInt32.
|
||||
* So, type Array(UInt32) will have global discriminator 0, String - 1 and UInt32 - 2.
|
||||
* Let's say we have a column with local order (String, UInt32, Array(UInt32)) and values:
|
||||
* 'Hello', 42, NULL, 'World', 43, [1, 2, 3], NULL, 44
|
||||
*
|
||||
* Let's see how these values will be stored in ColumnVariant:
|
||||
*
|
||||
* local_to_global_discriminators: {0 : 1, 1 : 2, 2 : 0}
|
||||
* global_to_local_discriminators: {0 : 2, 1 : 0, 2 : 1}
|
||||
* local_discriminators offsets String UInt32 Array(UInt32)
|
||||
* 0 0 'Hello' 42 [1, 2, 3]
|
||||
* 1 0 'World' 43
|
||||
* NULL_DISCRIMINATOR 0 44
|
||||
* 0 1
|
||||
* 1 1
|
||||
* 2 0
|
||||
* NULL_DISCRIMINATOR 0
|
||||
* 1 2
|
||||
*
|
||||
*/
|
||||
class ColumnVariant final : public COWHelper<IColumn, ColumnVariant>
|
||||
{
|
||||
public:
|
||||
using Discriminator = UInt8;
|
||||
using Discriminators = PaddedPODArray<Discriminator>;
|
||||
using ColumnDiscriminators = ColumnVector<Discriminator>;
|
||||
using ColumnOffsets = ColumnVector<Offset>;
|
||||
|
||||
static constexpr UInt8 NULL_DISCRIMINATOR = std::numeric_limits<Discriminator>::max(); /// 255
|
||||
static constexpr size_t MAX_NESTED_COLUMNS = std::numeric_limits<Discriminator>::max(); /// 255
|
||||
|
||||
private:
|
||||
friend class COWHelper<IColumn, ColumnVariant>;
|
||||
|
||||
using NestedColumns = std::vector<WrappedPtr>;
|
||||
|
||||
/// Create an empty column with provided variants.
|
||||
/// Variants are in global order.
|
||||
explicit ColumnVariant(MutableColumns && variants_);
|
||||
/// Variants are in local order according to provided mapping.
|
||||
explicit ColumnVariant(MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
|
||||
|
||||
/// Create column from discriminators column and list of variant columns.
|
||||
/// Offsets column should be constructed according to the discriminators.
|
||||
/// Variants are in global order.
|
||||
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumns && variants_);
|
||||
/// Variants are in local order according to provided mapping.
|
||||
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
|
||||
|
||||
/// Create column from discriminators column, offsets column and list of variant columns.
|
||||
/// Variants are in global order.
|
||||
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_);
|
||||
/// Variants are in local order according to provided mapping.
|
||||
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
|
||||
|
||||
ColumnVariant(const ColumnVariant &) = default;
|
||||
|
||||
public:
|
||||
/** Create immutable column using immutable arguments. This arguments may be shared with other variants.
|
||||
* Use IColumn::mutate in order to make mutable column and mutate shared nested variants.
|
||||
*/
|
||||
using Base = COWHelper<IColumn, ColumnVariant>;
|
||||
static Ptr create(const Columns & variants_) { return create(variants_, {}); }
|
||||
static Ptr create(const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
|
||||
static Ptr create(const ColumnPtr & local_discriminators_, const Columns & variants_) { return create(local_discriminators_, variants_, {}); }
|
||||
static Ptr create(const ColumnPtr & local_discriminators_, const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
|
||||
static Ptr create(const ColumnPtr & local_discriminators_, const DB::ColumnPtr & offsets_, const Columns & variants_) { return create(local_discriminators_, offsets_, variants_, {}); }
|
||||
static Ptr create(const ColumnPtr & local_discriminators_, const DB::ColumnPtr & offsets_, const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
|
||||
|
||||
static MutablePtr create(MutableColumns && variants_)
|
||||
{
|
||||
return Base::create(std::move(variants_));
|
||||
}
|
||||
|
||||
static MutablePtr create(MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
|
||||
{
|
||||
return Base::create(std::move(variants_), local_to_global_discriminators_);
|
||||
}
|
||||
|
||||
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumns && variants_)
|
||||
{
|
||||
return Base::create(std::move(local_discriminators_), std::move(variants_));
|
||||
}
|
||||
|
||||
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
|
||||
{
|
||||
return Base::create(std::move(local_discriminators_), std::move(variants_), local_to_global_discriminators_);
|
||||
}
|
||||
|
||||
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_)
|
||||
{
|
||||
return Base::create(std::move(local_discriminators_), std::move(offsets_), std::move(variants_));
|
||||
}
|
||||
|
||||
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
|
||||
{
|
||||
return Base::create(std::move(local_discriminators_), std::move(offsets_), std::move(variants_), local_to_global_discriminators_);
|
||||
}
|
||||
|
||||
std::string getName() const override;
|
||||
const char * getFamilyName() const override { return "Variant"; }
|
||||
TypeIndex getDataType() const override { return TypeIndex::Variant; }
|
||||
|
||||
MutableColumnPtr cloneEmpty() const override;
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
|
||||
size_t ALWAYS_INLINE offsetAt(size_t i) const { return getOffsets()[i]; }
|
||||
Discriminator ALWAYS_INLINE localDiscriminatorAt(size_t i) const { return getLocalDiscriminators()[i]; }
|
||||
Discriminator ALWAYS_INLINE globalDiscriminatorAt(size_t i) const { return globalDiscriminatorByLocal(getLocalDiscriminators()[i]); }
|
||||
|
||||
Discriminator ALWAYS_INLINE globalDiscriminatorByLocal(Discriminator local_discr) const
|
||||
{
|
||||
/// NULL_DISCRIMINATOR is always the same in local and global orders.
|
||||
return local_discr == NULL_DISCRIMINATOR ? NULL_DISCRIMINATOR : local_to_global_discriminators[local_discr];
|
||||
}
|
||||
|
||||
Discriminator ALWAYS_INLINE localDiscriminatorByGlobal(Discriminator global_discr) const
|
||||
{
|
||||
/// NULL_DISCRIMINATOR is always the same in local and global orders.
|
||||
return global_discr == NULL_DISCRIMINATOR ? NULL_DISCRIMINATOR : global_to_local_discriminators[global_discr];
|
||||
}
|
||||
|
||||
size_t size() const override
|
||||
{
|
||||
return offsets->size();
|
||||
}
|
||||
|
||||
Field operator[](size_t n) const override;
|
||||
void get(size_t n, Field & res) const override;
|
||||
|
||||
bool isDefaultAt(size_t n) const override;
|
||||
bool isNullAt(size_t n) const override;
|
||||
StringRef getDataAt(size_t n) const override;
|
||||
void insertData(const char * pos, size_t length) override;
|
||||
void insert(const Field & x) override;
|
||||
void insertIntoVariant(const Field & x, Discriminator global_discr);
|
||||
void insertFrom(const IColumn & src_, size_t n) override;
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
|
||||
void insertDefault() override;
|
||||
void insertManyDefaults(size_t length) override;
|
||||
void popBack(size_t n) override;
|
||||
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
|
||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||
const char * skipSerializedInArena(const char * pos) const override;
|
||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||
void updateWeakHash32(WeakHash32 & hash) const override;
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||
void expand(const Filter & mask, bool inverted) override;
|
||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
||||
template <typename Type>
|
||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
||||
void gather(ColumnGathererStream & gatherer_stream) override;
|
||||
|
||||
/// Variant type is not comparable.
|
||||
int compareAt(size_t, size_t, const IColumn &, int) const override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method compareColumn is not supported for ColumnVariant");
|
||||
}
|
||||
|
||||
bool hasEqualValues() const override;
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
||||
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
||||
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
|
||||
void reserve(size_t n) override;
|
||||
void ensureOwnership() override;
|
||||
size_t byteSize() const override;
|
||||
size_t byteSizeAt(size_t n) const override;
|
||||
size_t allocatedBytes() const override;
|
||||
void protect() override;
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
ColumnPtr compress() const override;
|
||||
double getRatioOfDefaultRows(double sample_ratio) const override;
|
||||
UInt64 getNumberOfDefaultRows() const override;
|
||||
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
|
||||
void finalize() override;
|
||||
bool isFinalized() const override;
|
||||
|
||||
const IColumn & getVariantByLocalDiscriminator(size_t discr) const { return *variants[discr]; }
|
||||
const IColumn & getVariantByGlobalDiscriminator(size_t discr) const { return *variants[global_to_local_discriminators.at(discr)]; }
|
||||
IColumn & getVariantByLocalDiscriminator(size_t discr) { return *variants[discr]; }
|
||||
IColumn & getVariantByGlobalDiscriminator(size_t discr) { return *variants[global_to_local_discriminators.at(discr)]; }
|
||||
|
||||
const ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) const { return variants[discr]; }
|
||||
const ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) const { return variants[global_to_local_discriminators.at(discr)]; }
|
||||
ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) { return variants[discr]; }
|
||||
ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; }
|
||||
|
||||
const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; }
|
||||
IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; }
|
||||
|
||||
const ColumnPtr & getLocalDiscriminatorsPtr() const { return local_discriminators; }
|
||||
ColumnPtr & getLocalDiscriminatorsPtr() { return local_discriminators; }
|
||||
|
||||
const Discriminators & ALWAYS_INLINE getLocalDiscriminators() const { return assert_cast<const ColumnDiscriminators &>(*local_discriminators).getData(); }
|
||||
Discriminators & ALWAYS_INLINE getLocalDiscriminators() { return assert_cast<ColumnDiscriminators &>(*local_discriminators).getData(); }
|
||||
|
||||
const IColumn & getOffsetsColumn() const { return *offsets; }
|
||||
IColumn & getOffsetsColumn() { return *offsets; }
|
||||
|
||||
const ColumnPtr & getOffsetsPtr() const { return offsets; }
|
||||
ColumnPtr & getOffsetsPtr() { return offsets; }
|
||||
|
||||
const Offsets & ALWAYS_INLINE getOffsets() const { return assert_cast<const ColumnOffsets &>(*offsets).getData(); }
|
||||
Offsets & ALWAYS_INLINE getOffsets() { return assert_cast<ColumnOffsets &>(*offsets).getData(); }
|
||||
|
||||
size_t getNumVariants() const { return variants.size(); }
|
||||
|
||||
bool hasOnlyNulls() const
|
||||
{
|
||||
/// If all variants are empty, we have only NULL values.
|
||||
return std::all_of(variants.begin(), variants.end(), [](const WrappedPtr & v){ return v->empty(); });
|
||||
}
|
||||
|
||||
/// Check if local and global order is the same.
|
||||
bool hasGlobalVariantsOrder() const
|
||||
{
|
||||
for (size_t i = 0; i != local_to_global_discriminators.size(); ++i)
|
||||
{
|
||||
if (local_to_global_discriminators[i] != i)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Check if we have only 1 non-empty variant and no NULL values,
|
||||
/// and if so, return the discriminator of this non-empty column.
|
||||
std::optional<Discriminator> getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls() const;
|
||||
|
||||
/// Apply null map to a Variant column.
|
||||
/// Replace corresponding discriminators with NULL_DISCRIMINATOR
|
||||
/// and filter out rows in variants if needed.
|
||||
void applyNullMap(const ColumnVector<UInt8>::Container & null_map);
|
||||
void applyNegatedNullMap(const ColumnVector<UInt8>::Container & null_map);
|
||||
|
||||
private:
|
||||
void initIdentityGlobalToLocalDiscriminatorsMapping();
|
||||
|
||||
template <bool inverted>
|
||||
void applyNullMapImpl(const ColumnVector<UInt8>::Container & null_map);
|
||||
|
||||
WrappedPtr local_discriminators;
|
||||
WrappedPtr offsets;
|
||||
NestedColumns variants;
|
||||
|
||||
std::vector<Discriminator> global_to_local_discriminators;
|
||||
std::vector<Discriminator> local_to_global_discriminators;
|
||||
};
|
||||
|
||||
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
#include <Columns/RadixSortHelper.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -940,7 +941,7 @@ ColumnPtr ColumnVector<T>::compress() const
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
|
||||
ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
|
||||
{
|
||||
if (offsets.size() + shift != size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
@ -949,7 +950,7 @@ ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, c
|
||||
auto res = this->create();
|
||||
auto & res_data = res->getData();
|
||||
|
||||
T default_value = static_cast<T>(default_field.safeGet<T>());
|
||||
T default_value = assert_cast<const ColumnVector<T> &>(column_with_default_value.getDataColumn()).getElement(0);
|
||||
res_data.resize_fill(total_rows, default_value);
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
res_data[offsets[i]] = data[i + shift];
|
||||
|
@ -300,7 +300,7 @@ public:
|
||||
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
|
||||
}
|
||||
|
||||
ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override;
|
||||
ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <IO/Operators.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/Serializations/SerializationInfo.h>
|
||||
@ -34,7 +35,7 @@ void IColumn::insertFrom(const IColumn & src, size_t n)
|
||||
insert(src[n]);
|
||||
}
|
||||
|
||||
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
|
||||
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
|
||||
{
|
||||
if (offsets.size() + shift != size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
@ -50,14 +51,14 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
|
||||
current_offset = offsets[i];
|
||||
|
||||
if (offsets_diff > 1)
|
||||
res->insertMany(default_field, offsets_diff - 1);
|
||||
res->insertManyFrom(column_with_default_value.getDataColumn(), 0, offsets_diff - 1);
|
||||
|
||||
res->insertFrom(*this, i + shift);
|
||||
}
|
||||
|
||||
ssize_t offsets_diff = static_cast<ssize_t>(total_rows) - current_offset;
|
||||
if (offsets_diff > 1)
|
||||
res->insertMany(default_field, offsets_diff - 1);
|
||||
res->insertManyFrom(column_with_default_value.getDataColumn(), 0, offsets_diff - 1);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -83,6 +84,11 @@ bool isColumnNullable(const IColumn & column)
|
||||
return checkColumn<ColumnNullable>(column);
|
||||
}
|
||||
|
||||
bool isColumnNullableOrLowCardinalityNullable(const IColumn & column)
|
||||
{
|
||||
return isColumnNullable(column) || isColumnLowCardinalityNullable(column);
|
||||
}
|
||||
|
||||
bool isColumnConst(const IColumn & column)
|
||||
{
|
||||
return checkColumn<ColumnConst>(column);
|
||||
|
@ -34,6 +34,7 @@ class Arena;
|
||||
class ColumnGathererStream;
|
||||
class Field;
|
||||
class WeakHash32;
|
||||
class ColumnConst;
|
||||
|
||||
/*
|
||||
* Represents a set of equal ranges in previous column to perform sorting in current column.
|
||||
@ -459,10 +460,10 @@ public:
|
||||
|
||||
/// Returns column with @total_size elements.
|
||||
/// In result column values from current column are at positions from @offsets.
|
||||
/// Other values are filled by @default_value.
|
||||
/// Other values are filled by value from @column_with_default_value.
|
||||
/// @shift means how much rows to skip from the beginning of current column.
|
||||
/// Used to create full column from sparse.
|
||||
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
|
||||
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const;
|
||||
|
||||
/// Compress column in memory to some representation that allows to decompress it back.
|
||||
/// Return itself if compression is not applicable for this column type.
|
||||
@ -659,4 +660,7 @@ bool isColumnConst(const IColumn & column);
|
||||
/// True if column's an ColumnNullable instance. It's just a syntax sugar for type check.
|
||||
bool isColumnNullable(const IColumn & column);
|
||||
|
||||
/// True if column's is ColumnNullable or ColumnLowCardinality with nullable nested column.
|
||||
bool isColumnNullableOrLowCardinalityNullable(const IColumn & column);
|
||||
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted)
|
||||
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted, T default_value)
|
||||
{
|
||||
if (mask.size() < data.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mask size should be no less than data size.");
|
||||
@ -38,7 +38,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
|
||||
--from;
|
||||
}
|
||||
else
|
||||
data[index] = T();
|
||||
data[index] = default_value;
|
||||
|
||||
--index;
|
||||
}
|
||||
@ -49,7 +49,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
|
||||
|
||||
/// Explicit instantiations - not to place the implementation of the function above in the header file.
|
||||
#define INSTANTIATE(TYPE) \
|
||||
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool);
|
||||
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool, TYPE);
|
||||
|
||||
INSTANTIATE(UInt8)
|
||||
INSTANTIATE(UInt16)
|
||||
|
@ -13,7 +13,7 @@ namespace DB
|
||||
/// If inverted is true, we will work with inverted mask. This function is used in implementations of
|
||||
/// expand() method in IColumn interface.
|
||||
template <typename T>
|
||||
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted);
|
||||
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted, T default_value = T());
|
||||
|
||||
struct MaskInfo
|
||||
{
|
||||
|
692
src/Columns/tests/gtest_column_variant.cpp
Normal file
692
src/Columns/tests/gtest_column_variant.cpp
Normal file
@ -0,0 +1,692 @@
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
TEST(ColumnVariant, CreateFromEmptyColumns)
|
||||
{
|
||||
MutableColumns columns;
|
||||
columns.push_back(ColumnUInt32::create());
|
||||
columns.push_back(ColumnString::create());
|
||||
auto column = ColumnVariant::create(std::move(columns));
|
||||
ASSERT_TRUE(column->empty() && column->getLocalDiscriminators().empty() && column->getOffsets().empty());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CreateFromEmptyColumnsWithLocalOrder)
|
||||
{
|
||||
MutableColumns columns;
|
||||
columns.push_back(ColumnUInt32::create());
|
||||
columns.push_back(ColumnString::create());
|
||||
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
|
||||
local_to_global_discriminators.push_back(1);
|
||||
local_to_global_discriminators.push_back(0);
|
||||
auto column = ColumnVariant::create(std::move(columns), local_to_global_discriminators);
|
||||
ASSERT_TRUE(column->empty() && column->getLocalDiscriminators().empty() && column->getOffsets().empty());
|
||||
ASSERT_EQ(column->localDiscriminatorByGlobal(0), 0);
|
||||
ASSERT_EQ(column->localDiscriminatorByGlobal(1), 1);
|
||||
ASSERT_EQ(column->globalDiscriminatorByLocal(0), 0);
|
||||
ASSERT_EQ(column->globalDiscriminatorByLocal(1), 1);
|
||||
}
|
||||
|
||||
MutableColumns createColumns1()
|
||||
{
|
||||
MutableColumns columns;
|
||||
auto column1 = ColumnUInt64::create();
|
||||
column1->insertValue(42);
|
||||
columns.push_back(std::move(column1));
|
||||
auto column2 = ColumnString::create();
|
||||
column2->insertData("Hello", 5);
|
||||
column2->insertData("World", 5);
|
||||
columns.push_back(std::move(column2));
|
||||
auto column3 = ColumnUInt32::create();
|
||||
columns.push_back(std::move(column3));
|
||||
return columns;
|
||||
}
|
||||
|
||||
MutableColumnPtr createDiscriminators1()
|
||||
{
|
||||
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
return discriminators_column;
|
||||
}
|
||||
|
||||
void reorderColumns(const std::vector<ColumnVariant::Discriminator> & local_to_global_order, MutableColumns & columns)
|
||||
{
|
||||
MutableColumns res;
|
||||
for (auto global_discr : local_to_global_order)
|
||||
res.push_back(std::move(columns[global_discr]));
|
||||
columns = std::move(res);
|
||||
}
|
||||
|
||||
template <typename Ptr>
|
||||
void reorderDiscriminators(const std::vector<ColumnVariant::Discriminator> & local_to_global_order, Ptr & discriminators)
|
||||
{
|
||||
std::vector<ColumnVariant::Discriminator> global_to_local_order(local_to_global_order.size());
|
||||
for (size_t i = 0; i != local_to_global_order.size(); ++i)
|
||||
global_to_local_order[local_to_global_order[i]] = i;
|
||||
|
||||
auto & discriminators_data = assert_cast<ColumnVariant::ColumnDiscriminators *>(discriminators.get())->getData();
|
||||
for (auto & discr : discriminators_data)
|
||||
{
|
||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
discr = global_to_local_order[discr];
|
||||
}
|
||||
}
|
||||
|
||||
MutableColumnPtr createOffsets1()
|
||||
{
|
||||
auto offsets = ColumnVariant::ColumnOffsets::create();
|
||||
offsets->insertValue(0);
|
||||
offsets->insertValue(0);
|
||||
offsets->insertValue(0);
|
||||
offsets->insertValue(1);
|
||||
offsets->insertValue(0);
|
||||
return offsets;
|
||||
}
|
||||
|
||||
std::vector<ColumnVariant::Discriminator> createLocalToGlobalOrder1()
|
||||
{
|
||||
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
|
||||
local_to_global_discriminators.push_back(1);
|
||||
local_to_global_discriminators.push_back(2);
|
||||
local_to_global_discriminators.push_back(0);
|
||||
return local_to_global_discriminators;
|
||||
}
|
||||
|
||||
void checkColumnVariant1(ColumnVariant * column)
|
||||
{
|
||||
const auto & offsets = column->getOffsets();
|
||||
ASSERT_EQ(column->size(), 5);
|
||||
ASSERT_EQ(offsets[0], 0);
|
||||
ASSERT_EQ(offsets[1], 0);
|
||||
ASSERT_EQ(offsets[3], 1);
|
||||
ASSERT_TRUE(column->isDefaultAt(2) && column->isDefaultAt(4));
|
||||
ASSERT_EQ((*column)[0].get<UInt32>(), 42);
|
||||
ASSERT_EQ((*column)[1].get<String>(), "Hello");
|
||||
ASSERT_TRUE((*column)[2].isNull());
|
||||
ASSERT_EQ((*column)[3].get<String>(), "World");
|
||||
ASSERT_TRUE((*column)[4].isNull());
|
||||
}
|
||||
|
||||
void checkColumnVariant1Order(ColumnVariant * column)
|
||||
{
|
||||
ASSERT_EQ(column->localDiscriminatorByGlobal(0), 2);
|
||||
ASSERT_EQ(column->localDiscriminatorByGlobal(1), 0);
|
||||
ASSERT_EQ(column->localDiscriminatorByGlobal(2), 1);
|
||||
ASSERT_EQ(column->globalDiscriminatorByLocal(0), 1);
|
||||
ASSERT_EQ(column->globalDiscriminatorByLocal(1), 2);
|
||||
ASSERT_EQ(column->globalDiscriminatorByLocal(2), 0);
|
||||
ASSERT_EQ(column->localDiscriminatorAt(0), 2);
|
||||
ASSERT_EQ(column->localDiscriminatorAt(1), 0);
|
||||
ASSERT_EQ(column->localDiscriminatorAt(2), ColumnVariant::NULL_DISCRIMINATOR);
|
||||
ASSERT_EQ(column->localDiscriminatorAt(3), 0);
|
||||
ASSERT_EQ(column->localDiscriminatorAt(4), ColumnVariant::NULL_DISCRIMINATOR);
|
||||
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
|
||||
ASSERT_EQ(column->globalDiscriminatorAt(1), 1);
|
||||
ASSERT_EQ(column->globalDiscriminatorAt(2), ColumnVariant::NULL_DISCRIMINATOR);
|
||||
ASSERT_EQ(column->globalDiscriminatorAt(3), 1);
|
||||
ASSERT_EQ(column->globalDiscriminatorAt(4), ColumnVariant::NULL_DISCRIMINATOR);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CreateFromDiscriminatorsAndColumns)
|
||||
{
|
||||
auto columns = createColumns1();
|
||||
auto discriminators = createDiscriminators1();
|
||||
auto column = ColumnVariant::create(std::move(discriminators), std::move(columns));
|
||||
checkColumnVariant1(column.get());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CreateFromDiscriminatorsAndColumnsWithLocalOrder)
|
||||
{
|
||||
auto local_to_global_order = createLocalToGlobalOrder1();
|
||||
auto columns = createColumns1();
|
||||
reorderColumns(local_to_global_order, columns);
|
||||
auto discriminators = createDiscriminators1();
|
||||
reorderDiscriminators(local_to_global_order, discriminators);
|
||||
auto column = ColumnVariant::create(std::move(discriminators), std::move(columns), createLocalToGlobalOrder1());
|
||||
checkColumnVariant1(column.get());
|
||||
checkColumnVariant1Order(column.get());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CreateFromDiscriminatorsOffsetsAndColumns)
|
||||
{
|
||||
auto columns = createColumns1();
|
||||
auto discriminators = createDiscriminators1();
|
||||
auto offsets = createOffsets1();
|
||||
auto column = ColumnVariant::create(std::move(discriminators), std::move(offsets), std::move(columns));
|
||||
checkColumnVariant1(column.get());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CreateFromDiscriminatorsOffsetsAndColumnsWithLocalOrder)
|
||||
{
|
||||
auto local_to_global_order = createLocalToGlobalOrder1();
|
||||
auto columns = createColumns1();
|
||||
reorderColumns(local_to_global_order, columns);
|
||||
auto discriminators = createDiscriminators1();
|
||||
reorderDiscriminators(local_to_global_order, discriminators);
|
||||
auto offsets = createOffsets1();
|
||||
auto column = ColumnVariant::create(std::move(discriminators), std::move(offsets), std::move(columns), createLocalToGlobalOrder1());
|
||||
checkColumnVariant1(column.get());
|
||||
checkColumnVariant1Order(column.get());
|
||||
}
|
||||
|
||||
ColumnVariant::MutablePtr createVariantWithOneFullColumNoNulls(size_t size, bool change_order)
|
||||
{
|
||||
MutableColumns columns;
|
||||
auto column1 = ColumnUInt64::create();
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
column1->insertValue(i);
|
||||
columns.push_back(std::move(column1));
|
||||
auto column2 = ColumnString::create();
|
||||
columns.push_back(std::move(column2));
|
||||
auto column3 = ColumnUInt32::create();
|
||||
columns.push_back(std::move(column3));
|
||||
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
discriminators_column->insertValue(0);
|
||||
if (change_order)
|
||||
{
|
||||
auto local_to_global_order = createLocalToGlobalOrder1();
|
||||
reorderColumns(local_to_global_order, columns);
|
||||
reorderDiscriminators(local_to_global_order, discriminators_column);
|
||||
return ColumnVariant::create(std::move(discriminators_column), std::move(columns), createLocalToGlobalOrder1());
|
||||
}
|
||||
return ColumnVariant::create(std::move(discriminators_column), std::move(columns));
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNulls)
|
||||
{
|
||||
auto column = createVariantWithOneFullColumNoNulls(3, false);
|
||||
const auto & offsets = column->getOffsets();
|
||||
ASSERT_EQ(column->size(), 3);
|
||||
ASSERT_EQ(offsets[0], 0);
|
||||
ASSERT_EQ(offsets[1], 1);
|
||||
ASSERT_EQ(offsets[2], 2);
|
||||
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
|
||||
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
|
||||
ASSERT_EQ((*column)[2].get<UInt64>(), 2);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrder)
|
||||
{
|
||||
auto column = createVariantWithOneFullColumNoNulls(3, true);
|
||||
const auto & offsets = column->getOffsets();
|
||||
ASSERT_EQ(column->size(), 3);
|
||||
ASSERT_EQ(offsets[0], 0);
|
||||
ASSERT_EQ(offsets[1], 1);
|
||||
ASSERT_EQ(offsets[2], 2);
|
||||
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
|
||||
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
|
||||
ASSERT_EQ((*column)[2].get<UInt64>(), 2);
|
||||
ASSERT_EQ(column->localDiscriminatorAt(0), 2);
|
||||
ASSERT_EQ(column->localDiscriminatorAt(1), 2);
|
||||
ASSERT_EQ(column->localDiscriminatorAt(2), 2);
|
||||
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
|
||||
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
|
||||
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CloneResizedToEmpty)
|
||||
{
|
||||
auto column = ColumnVariant::create(createDiscriminators1(), createOffsets1(), createColumns1());
|
||||
auto resized_column = column->cloneResized(0);
|
||||
ASSERT_TRUE(resized_column->empty());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CloneResizedToLarge)
|
||||
{
|
||||
auto column = ColumnVariant::create(createDiscriminators1(), createOffsets1(), createColumns1());
|
||||
auto resized_column = column->cloneResized(7);
|
||||
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
|
||||
ASSERT_EQ(resized_column_variant->size(), 7);
|
||||
const auto & offsets = resized_column_variant->getOffsets();
|
||||
for (size_t i = 0; i != 7; ++i)
|
||||
{
|
||||
if (i == 3)
|
||||
ASSERT_EQ(offsets[i], 1);
|
||||
else
|
||||
ASSERT_EQ(offsets[i], 0);
|
||||
}
|
||||
|
||||
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
|
||||
std::vector<size_t> null_indexes = {2, 4, 5, 6};
|
||||
for (size_t i : null_indexes)
|
||||
ASSERT_EQ(discriminators[i], ColumnVariant::NULL_DISCRIMINATOR);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 1);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 2);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CloneResizedWithOneFullColumnNoNulls)
|
||||
{
|
||||
auto column = createVariantWithOneFullColumNoNulls(5, false);
|
||||
auto resized_column = column->cloneResized(3);
|
||||
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
|
||||
ASSERT_EQ(resized_column_variant->size(), 3);
|
||||
const auto & offsets = resized_column_variant->getOffsets();
|
||||
for (size_t i = 0; i != 3; ++i)
|
||||
ASSERT_EQ(offsets[i], i);
|
||||
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
|
||||
for (size_t i = 0; i != 3; ++i)
|
||||
ASSERT_EQ(discriminators[i], 0);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 3);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 0);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
|
||||
}
|
||||
|
||||
MutableColumns createColumns2()
|
||||
{
|
||||
MutableColumns columns;
|
||||
auto column1 = ColumnUInt64::create();
|
||||
column1->insertValue(42);
|
||||
column1->insertValue(43);
|
||||
column1->insertValue(44);
|
||||
columns.push_back(std::move(column1));
|
||||
auto column2 = ColumnString::create();
|
||||
column2->insertData("Hello", 5);
|
||||
column2->insertData("World", 5);
|
||||
columns.push_back(std::move(column2));
|
||||
auto column3 = ColumnUInt8::create();
|
||||
columns.push_back(std::move(column3));
|
||||
return columns;
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CloneResizedGeneral1)
|
||||
{
|
||||
/// D c1 c2 c3
|
||||
/// 0 42 Hello
|
||||
/// 1 43 World
|
||||
/// NULL 44
|
||||
/// 0
|
||||
/// 1
|
||||
/// NULL
|
||||
/// 0
|
||||
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(0);
|
||||
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
|
||||
auto resized_column = column->cloneResized(4);
|
||||
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
|
||||
ASSERT_EQ(resized_column_variant->size(), 4);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 2);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 1);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
|
||||
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
|
||||
ASSERT_EQ(discriminators[0], 0);
|
||||
ASSERT_EQ(discriminators[1], 1);
|
||||
ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR);
|
||||
ASSERT_EQ(discriminators[3], 0);
|
||||
const auto & offsets = resized_column_variant->getOffsets();
|
||||
ASSERT_EQ(offsets[0], 0);
|
||||
ASSERT_EQ(offsets[1], 0);
|
||||
ASSERT_EQ(offsets[3], 1);
|
||||
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
|
||||
ASSERT_EQ((*resized_column_variant)[1].get<String>(), "Hello");
|
||||
ASSERT_EQ((*resized_column_variant)[3].get<UInt64>(), 43);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CloneResizedGeneral2)
|
||||
{
|
||||
/// D c1 c2 c3
|
||||
/// 0 42 Hello
|
||||
/// NULL 43 World
|
||||
/// NULL 44
|
||||
/// 0
|
||||
/// 1
|
||||
/// 1
|
||||
/// 0
|
||||
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(0);
|
||||
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
|
||||
auto resized_column = column->cloneResized(3);
|
||||
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
|
||||
ASSERT_EQ(resized_column_variant->size(), 3);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 1);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 0);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
|
||||
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
|
||||
ASSERT_EQ(discriminators[0], 0);
|
||||
ASSERT_EQ(discriminators[1], ColumnVariant::NULL_DISCRIMINATOR);
|
||||
ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR);
|
||||
const auto & offsets = resized_column_variant->getOffsets();
|
||||
ASSERT_EQ(offsets[0], 0);
|
||||
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, CloneResizedGeneral3)
|
||||
{
|
||||
/// D c1 c2 c3
|
||||
/// 0 42 Hello
|
||||
/// 1 43 World
|
||||
/// 1 44
|
||||
/// 0
|
||||
/// NULL
|
||||
/// NULL
|
||||
/// 0
|
||||
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(0);
|
||||
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
|
||||
auto resized_column = column->cloneResized(5);
|
||||
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
|
||||
ASSERT_EQ(resized_column_variant->size(), 5);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 2);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 2);
|
||||
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
|
||||
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
|
||||
ASSERT_EQ(discriminators[0], 0);
|
||||
ASSERT_EQ(discriminators[1], 1);
|
||||
ASSERT_EQ(discriminators[2], 1);
|
||||
ASSERT_EQ(discriminators[3], 0);
|
||||
const auto & offsets = resized_column_variant->getOffsets();
|
||||
ASSERT_EQ(offsets[0], 0);
|
||||
ASSERT_EQ(offsets[1], 0);
|
||||
ASSERT_EQ(offsets[2], 1);
|
||||
ASSERT_EQ(offsets[3], 1);
|
||||
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
|
||||
ASSERT_EQ((*resized_column_variant)[1].get<String>(), "Hello");
|
||||
ASSERT_EQ((*resized_column_variant)[2].get<String>(), "World");
|
||||
ASSERT_EQ((*resized_column_variant)[3].get<UInt64>(), 43);
|
||||
}
|
||||
|
||||
MutableColumnPtr createDiscriminators2()
|
||||
{
|
||||
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(0);
|
||||
discriminators_column->insertValue(1);
|
||||
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
discriminators_column->insertValue(0);
|
||||
return discriminators_column;
|
||||
}
|
||||
|
||||
std::vector<ColumnVariant::Discriminator> createLocalToGlobalOrder2()
|
||||
{
|
||||
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
|
||||
local_to_global_discriminators.push_back(2);
|
||||
local_to_global_discriminators.push_back(0);
|
||||
local_to_global_discriminators.push_back(1);
|
||||
return local_to_global_discriminators;
|
||||
}
|
||||
|
||||
ColumnVariant::MutablePtr createVariantColumn1(bool reorder)
|
||||
{
|
||||
auto columns = createColumns1();
|
||||
auto discriminators = createDiscriminators1();
|
||||
if (!reorder)
|
||||
return ColumnVariant::create(std::move(discriminators), std::move(columns));
|
||||
auto local_to_global_order = createLocalToGlobalOrder1();
|
||||
reorderColumns(local_to_global_order, columns);
|
||||
reorderDiscriminators(local_to_global_order, discriminators);
|
||||
return ColumnVariant::create(std::move(discriminators), std::move(columns), local_to_global_order);
|
||||
}
|
||||
|
||||
ColumnVariant::MutablePtr createVariantColumn2(bool reorder)
|
||||
{
|
||||
auto columns = createColumns2();
|
||||
auto discriminators = createDiscriminators2();
|
||||
if (!reorder)
|
||||
return ColumnVariant::create(std::move(discriminators), std::move(columns));
|
||||
auto local_to_global_order = createLocalToGlobalOrder2();
|
||||
reorderColumns(local_to_global_order, columns);
|
||||
reorderDiscriminators(local_to_global_order, discriminators);
|
||||
return ColumnVariant::create(std::move(discriminators), std::move(columns), local_to_global_order);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, InsertFrom)
|
||||
{
|
||||
for (bool change_order : {false, true})
|
||||
{
|
||||
auto column_to = createVariantColumn1(change_order);
|
||||
auto column_from = createVariantColumn2(change_order);
|
||||
column_to->insertFrom(*column_from, 3);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0);
|
||||
ASSERT_EQ((*column_to)[5].get<UInt64>(), 43);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, InsertRangeFromOneColumnNoNulls)
|
||||
{
|
||||
for (bool change_order : {false, true})
|
||||
{
|
||||
auto column_to = createVariantColumn2(change_order);
|
||||
auto column_from = createVariantWithOneFullColumNoNulls(5, change_order);
|
||||
column_to->insertRangeFrom(*column_from, 2, 2);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(8), 0);
|
||||
ASSERT_EQ((*column_to)[7].get<UInt64>(), 2);
|
||||
ASSERT_EQ((*column_to)[8].get<UInt64>(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, InsertRangeFromGeneral)
|
||||
{
|
||||
for (bool change_order : {false, true})
|
||||
{
|
||||
auto column_to = createVariantColumn1(change_order);
|
||||
auto column_from = createVariantColumn2(change_order);
|
||||
column_to->insertRangeFrom(*column_from, 1, 4);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 1);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(6), ColumnVariant::NULL_DISCRIMINATOR);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(8), 1);
|
||||
ASSERT_EQ((*column_to)[5].get<String>(), "Hello");
|
||||
ASSERT_EQ((*column_to)[7].get<UInt64>(), 43);
|
||||
ASSERT_EQ((*column_to)[8].get<String>(), "World");
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, InsertManyFrom)
|
||||
{
|
||||
for (bool change_order : {false, true})
|
||||
{
|
||||
auto column_to = createVariantColumn1(change_order);
|
||||
auto column_from = createVariantColumn2(change_order);
|
||||
column_to->insertManyFrom(*column_from, 3, 2);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0);
|
||||
ASSERT_EQ(column_to->globalDiscriminatorAt(6), 0);
|
||||
ASSERT_EQ((*column_to)[5].get<UInt64>(), 43);
|
||||
ASSERT_EQ((*column_to)[6].get<UInt64>(), 43);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, PopBackOneColumnNoNulls)
|
||||
{
|
||||
auto column = createVariantWithOneFullColumNoNulls(5, false);
|
||||
column->popBack(3);
|
||||
ASSERT_EQ(column->size(), 2);
|
||||
ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 2);
|
||||
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
|
||||
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, PopBackGeneral)
|
||||
{
|
||||
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
|
||||
column->popBack(4);
|
||||
ASSERT_EQ(column->size(), 3);
|
||||
ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 1);
|
||||
ASSERT_EQ(column->getVariantByLocalDiscriminator(1).size(), 1);
|
||||
ASSERT_EQ((*column)[0].get<UInt64>(), 42);
|
||||
ASSERT_EQ((*column)[1].get<String>(), "Hello");
|
||||
ASSERT_TRUE((*column)[2].isNull());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, FilterOneColumnNoNulls)
|
||||
{
|
||||
auto column = createVariantWithOneFullColumNoNulls(3, false);
|
||||
IColumn::Filter filter;
|
||||
filter.push_back(1);
|
||||
filter.push_back(0);
|
||||
filter.push_back(1);
|
||||
auto filtered_column = column->filter(filter, -1);
|
||||
ASSERT_EQ(filtered_column->size(), 2);
|
||||
ASSERT_EQ((*filtered_column)[0].get<UInt64>(), 0);
|
||||
ASSERT_EQ((*filtered_column)[1].get<UInt64>(), 2);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, FilterGeneral)
|
||||
{
|
||||
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
|
||||
IColumn::Filter filter;
|
||||
filter.push_back(0);
|
||||
filter.push_back(1);
|
||||
filter.push_back(1);
|
||||
filter.push_back(0);
|
||||
filter.push_back(0);
|
||||
filter.push_back(1);
|
||||
filter.push_back(0);
|
||||
auto filtered_column = column->filter(filter, -1);
|
||||
ASSERT_EQ(filtered_column->size(), 3);
|
||||
ASSERT_EQ((*filtered_column)[0].get<String>(), "Hello");
|
||||
ASSERT_TRUE((*filtered_column)[1].isNull());
|
||||
ASSERT_TRUE((*filtered_column)[2].isNull());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls)
|
||||
{
|
||||
auto column = createVariantWithOneFullColumNoNulls(4, false);
|
||||
IColumn::Permutation permutation;
|
||||
permutation.push_back(1);
|
||||
permutation.push_back(3);
|
||||
permutation.push_back(2);
|
||||
permutation.push_back(0);
|
||||
auto permuted_column = column->permute(permutation, 3);
|
||||
ASSERT_EQ(permuted_column->size(), 3);
|
||||
ASSERT_EQ((*permuted_column)[0].get<UInt64>(), 1);
|
||||
ASSERT_EQ((*permuted_column)[1].get<UInt64>(), 3);
|
||||
ASSERT_EQ((*permuted_column)[2].get<UInt64>(), 2);
|
||||
|
||||
auto index = ColumnUInt64::create();
|
||||
index->getData().push_back(1);
|
||||
index->getData().push_back(3);
|
||||
index->getData().push_back(2);
|
||||
index->getData().push_back(0);
|
||||
auto indexed_column = column->index(*index, 3);
|
||||
ASSERT_EQ(indexed_column->size(), 3);
|
||||
ASSERT_EQ((*indexed_column)[0].get<UInt64>(), 1);
|
||||
ASSERT_EQ((*indexed_column)[1].get<UInt64>(), 3);
|
||||
ASSERT_EQ((*indexed_column)[2].get<UInt64>(), 2);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, PermuteGeneral)
|
||||
{
|
||||
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
|
||||
IColumn::Permutation permutation;
|
||||
permutation.push_back(3);
|
||||
permutation.push_back(4);
|
||||
permutation.push_back(1);
|
||||
permutation.push_back(5);
|
||||
auto permuted_column = column->permute(permutation, 4);
|
||||
ASSERT_EQ(permuted_column->size(), 4);
|
||||
ASSERT_EQ((*permuted_column)[0].get<UInt64>(), 43);
|
||||
ASSERT_EQ((*permuted_column)[1].get<String>(), "World");
|
||||
ASSERT_EQ((*permuted_column)[2].get<String>(), "Hello");
|
||||
ASSERT_TRUE((*permuted_column)[3].isNull());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, ReplicateOneColumnNoNull)
|
||||
{
|
||||
auto column = createVariantWithOneFullColumNoNulls(3, false);
|
||||
IColumn::Offsets offsets;
|
||||
offsets.push_back(0);
|
||||
offsets.push_back(3);
|
||||
offsets.push_back(6);
|
||||
auto replicated_column = column->replicate(offsets);
|
||||
ASSERT_EQ(replicated_column->size(), 6);
|
||||
ASSERT_EQ((*replicated_column)[0].get<UInt64>(), 1);
|
||||
ASSERT_EQ((*replicated_column)[1].get<UInt64>(), 1);
|
||||
ASSERT_EQ((*replicated_column)[2].get<UInt64>(), 1);
|
||||
ASSERT_EQ((*replicated_column)[3].get<UInt64>(), 2);
|
||||
ASSERT_EQ((*replicated_column)[4].get<UInt64>(), 2);
|
||||
ASSERT_EQ((*replicated_column)[5].get<UInt64>(), 2);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, ReplicateGeneral)
|
||||
{
|
||||
auto column = ColumnVariant::create(createDiscriminators1(), createColumns1());
|
||||
IColumn::Offsets offsets;
|
||||
offsets.push_back(1);
|
||||
offsets.push_back(3);
|
||||
offsets.push_back(5);
|
||||
offsets.push_back(5);
|
||||
offsets.push_back(7);
|
||||
auto replicated_column = column->replicate(offsets);
|
||||
ASSERT_EQ(replicated_column->size(), 7);
|
||||
ASSERT_EQ((*replicated_column)[0].get<UInt64>(), 42);
|
||||
ASSERT_EQ((*replicated_column)[1].get<String>(), "Hello");
|
||||
ASSERT_EQ((*replicated_column)[2].get<String>(), "Hello");
|
||||
ASSERT_TRUE((*replicated_column)[3].isNull());
|
||||
ASSERT_TRUE((*replicated_column)[4].isNull());
|
||||
ASSERT_TRUE((*replicated_column)[5].isNull());
|
||||
ASSERT_TRUE((*replicated_column)[6].isNull());
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, ScatterOneColumnNoNulls)
|
||||
{
|
||||
auto column = createVariantWithOneFullColumNoNulls(5, false);
|
||||
IColumn::Selector selector;
|
||||
selector.push_back(0);
|
||||
selector.push_back(1);
|
||||
selector.push_back(2);
|
||||
selector.push_back(0);
|
||||
selector.push_back(1);
|
||||
auto columns = column->scatter(3, selector);
|
||||
ASSERT_EQ(columns[0]->size(), 2);
|
||||
ASSERT_EQ((*columns[0])[0].get<UInt64>(), 0);
|
||||
ASSERT_EQ((*columns[0])[1].get<UInt64>(), 3);
|
||||
ASSERT_EQ(columns[1]->size(), 2);
|
||||
ASSERT_EQ((*columns[1])[0].get<UInt64>(), 1);
|
||||
ASSERT_EQ((*columns[1])[1].get<UInt64>(), 4);
|
||||
ASSERT_EQ(columns[2]->size(), 1);
|
||||
ASSERT_EQ((*columns[2])[0].get<UInt64>(), 2);
|
||||
}
|
||||
|
||||
TEST(ColumnVariant, ScatterGeneral)
|
||||
{
|
||||
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
|
||||
IColumn::Selector selector;
|
||||
selector.push_back(0);
|
||||
selector.push_back(0);
|
||||
selector.push_back(2);
|
||||
selector.push_back(0);
|
||||
selector.push_back(1);
|
||||
selector.push_back(2);
|
||||
selector.push_back(1);
|
||||
|
||||
auto columns = column->scatter(3, selector);
|
||||
ASSERT_EQ(columns[0]->size(), 3);
|
||||
ASSERT_EQ((*columns[0])[0].get<UInt64>(), 42);
|
||||
ASSERT_EQ((*columns[0])[1].get<String>(), "Hello");
|
||||
ASSERT_EQ((*columns[0])[2].get<UInt64>(), 43);
|
||||
ASSERT_EQ(columns[1]->size(), 2);
|
||||
ASSERT_EQ((*columns[1])[0].get<String>(), "World");
|
||||
ASSERT_EQ((*columns[1])[1].get<UInt64>(), 44);
|
||||
ASSERT_EQ(columns[2]->size(), 2);
|
||||
ASSERT_TRUE((*columns[2])[0].isNull());
|
||||
ASSERT_TRUE((*columns[2])[1].isNull());
|
||||
}
|
@ -230,7 +230,7 @@ class IColumn;
|
||||
\
|
||||
M(Bool, force_index_by_date, false, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
|
||||
M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
|
||||
M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
|
||||
M(Bool, use_skip_indexes, true, "Use data skinipping indexes during query execution.", 0) \
|
||||
M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \
|
||||
M(String, ignore_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be excluded during query execution.", 0) \
|
||||
\
|
||||
@ -828,6 +828,7 @@ class IColumn;
|
||||
M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \
|
||||
M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \
|
||||
M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \
|
||||
M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \
|
||||
\
|
||||
/** Experimental functions */ \
|
||||
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
|
||||
@ -835,6 +836,7 @@ class IColumn;
|
||||
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
|
||||
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
|
||||
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
|
||||
|
@ -87,6 +87,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
|
||||
{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
|
||||
{"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
|
||||
{"allow_experimental_variant_type", false, false, "Add new experimental Variant type"},
|
||||
{"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"},
|
||||
{"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
|
||||
{"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"},
|
||||
{"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"},
|
||||
|
@ -49,6 +49,7 @@ enum class TypeIndex
|
||||
IPv4,
|
||||
IPv6,
|
||||
JSONPaths,
|
||||
Variant,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -290,6 +290,7 @@ DataTypeFactory::DataTypeFactory()
|
||||
registerDataTypeDomainGeo(*this);
|
||||
registerDataTypeMap(*this);
|
||||
registerDataTypeObject(*this);
|
||||
registerDataTypeVariant(*this);
|
||||
}
|
||||
|
||||
DataTypeFactory & DataTypeFactory::instance()
|
||||
|
@ -100,5 +100,6 @@ void registerDataTypeDomainBool(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainGeo(DataTypeFactory & factory);
|
||||
void registerDataTypeObject(DataTypeFactory & factory);
|
||||
void registerDataTypeVariant(DataTypeFactory & factory);
|
||||
|
||||
}
|
||||
|
@ -114,5 +114,33 @@ DataTypePtr makeNullableOrLowCardinalityNullable(const DataTypePtr & type)
|
||||
return std::make_shared<DataTypeNullable>(type);
|
||||
}
|
||||
|
||||
DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type)
|
||||
{
|
||||
if (isNullableOrLowCardinalityNullable(type))
|
||||
return type;
|
||||
|
||||
if (type->lowCardinality())
|
||||
{
|
||||
const auto & dictionary_type = assert_cast<const DataTypeLowCardinality &>(*type).getDictionaryType();
|
||||
return std::make_shared<DataTypeLowCardinality>(makeNullable(dictionary_type));
|
||||
}
|
||||
|
||||
return makeNullableSafe(type);
|
||||
}
|
||||
|
||||
DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type)
|
||||
{
|
||||
if (type->isNullable())
|
||||
return static_cast<const DataTypeNullable &>(*type).getNestedType();
|
||||
|
||||
if (type->isLowCardinalityNullable())
|
||||
{
|
||||
auto dict_type = removeNullable(static_cast<const DataTypeLowCardinality &>(*type).getDictionaryType());
|
||||
return std::make_shared<DataTypeLowCardinality>(dict_type);
|
||||
}
|
||||
|
||||
return type;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -54,5 +54,8 @@ DataTypePtr makeNullable(const DataTypePtr & type);
|
||||
DataTypePtr makeNullableSafe(const DataTypePtr & type);
|
||||
DataTypePtr removeNullable(const DataTypePtr & type);
|
||||
DataTypePtr makeNullableOrLowCardinalityNullable(const DataTypePtr & type);
|
||||
DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type);
|
||||
/// Nullable(T) -> T, LowCardinality(Nullable(T)) -> T
|
||||
DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type);
|
||||
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <DataTypes/Serializations/SerializationTuple.h>
|
||||
#include <DataTypes/Serializations/SerializationNamed.h>
|
||||
#include <DataTypes/Serializations/SerializationInfoTuple.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTNameTypePair.h>
|
||||
@ -189,11 +190,15 @@ MutableColumnPtr DataTypeTuple::createColumn() const
|
||||
|
||||
MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const
|
||||
{
|
||||
/// If we read Tuple as Variant subcolumn, it may be wrapped to SerializationVariantElement.
|
||||
/// Here we don't need it, so we drop this wrapper.
|
||||
const auto * current_serialization = &serialization;
|
||||
while (const auto * serialization_variant_element = typeid_cast<const SerializationVariantElement *>(current_serialization))
|
||||
current_serialization = serialization_variant_element->getNested().get();
|
||||
|
||||
/// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed
|
||||
/// several times to allow to reconstruct the substream path name.
|
||||
/// Here we don't need substream path name, so we drop first several wrapper serializations.
|
||||
|
||||
const auto * current_serialization = &serialization;
|
||||
while (const auto * serialization_named = typeid_cast<const SerializationNamed *>(current_serialization))
|
||||
current_serialization = serialization_named->getNested().get();
|
||||
|
||||
|
220
src/DataTypes/DataTypeVariant.cpp
Normal file
220
src/DataTypes/DataTypeVariant.cpp
Normal file
@ -0,0 +1,220 @@
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int EMPTY_DATA_PASSED;
|
||||
}
|
||||
|
||||
|
||||
DataTypeVariant::DataTypeVariant(const DataTypes & variants_)
|
||||
{
|
||||
/// Sort nested types by their full names and squash identical types.
|
||||
std::map<String, DataTypePtr> name_to_type;
|
||||
for (const auto & type : variants_)
|
||||
{
|
||||
/// Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types are not allowed inside Variant type.
|
||||
if (isNullableOrLowCardinalityNullable(type))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nullable/LowCardinality(Nullable) types are not allowed inside Variant type");
|
||||
if (type->getTypeId() == TypeIndex::Variant)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nested Variant types are not allowed");
|
||||
/// Don't use Nothing type as a variant.
|
||||
if (!isNothing(type))
|
||||
name_to_type[type->getName()] = type;
|
||||
}
|
||||
|
||||
variants.reserve(name_to_type.size());
|
||||
for (const auto & [_, type] : name_to_type)
|
||||
variants.push_back(type);
|
||||
|
||||
if (variants.empty())
|
||||
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty");
|
||||
|
||||
if (variants.size() > ColumnVariant::MAX_NESTED_COLUMNS)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Variant type with more than {} nested types is not allowed", ColumnVariant::MAX_NESTED_COLUMNS);
|
||||
}
|
||||
|
||||
std::string DataTypeVariant::doGetName() const
|
||||
{
|
||||
size_t size = variants.size();
|
||||
WriteBufferFromOwnString s;
|
||||
|
||||
s << "Variant(";
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
s << ", ";
|
||||
|
||||
s << variants[i]->getName();
|
||||
}
|
||||
s << ")";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
std::string DataTypeVariant::doGetPrettyName(size_t indent) const
|
||||
{
|
||||
size_t size = variants.size();
|
||||
WriteBufferFromOwnString s;
|
||||
s << "Variant(";
|
||||
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
s << ", ";
|
||||
|
||||
s << variants[i]->getPrettyName(indent);
|
||||
}
|
||||
|
||||
s << ')';
|
||||
return s.str();
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeVariant::createColumn() const
|
||||
{
|
||||
size_t size = variants.size();
|
||||
MutableColumns nested_columns;
|
||||
nested_columns.reserve(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
nested_columns.push_back(variants[i]->createColumn());
|
||||
|
||||
return ColumnVariant::create(std::move(nested_columns));
|
||||
}
|
||||
|
||||
Field DataTypeVariant::getDefault() const
|
||||
{
|
||||
return Null();
|
||||
}
|
||||
|
||||
bool DataTypeVariant::equals(const IDataType & rhs) const
|
||||
{
|
||||
if (typeid(rhs) != typeid(*this))
|
||||
return false;
|
||||
|
||||
const DataTypeVariant & rhs_variant = static_cast<const DataTypeVariant &>(rhs);
|
||||
|
||||
size_t size = variants.size();
|
||||
if (size != rhs_variant.variants.size())
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
if (!variants[i]->equals(*rhs_variant.variants[i]))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DataTypeVariant::textCanContainOnlyValidUTF8() const
|
||||
{
|
||||
return std::all_of(variants.begin(), variants.end(), [](auto && elem) { return elem->textCanContainOnlyValidUTF8(); });
|
||||
}
|
||||
|
||||
bool DataTypeVariant::haveMaximumSizeOfValue() const
|
||||
{
|
||||
return std::all_of(variants.begin(), variants.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); });
|
||||
}
|
||||
|
||||
bool DataTypeVariant::hasDynamicSubcolumns() const
|
||||
{
|
||||
return std::any_of(variants.begin(), variants.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); });
|
||||
}
|
||||
|
||||
std::optional<ColumnVariant::Discriminator> DataTypeVariant::tryGetVariantDiscriminator(const DataTypePtr & type) const
|
||||
{
|
||||
String type_name = type->getName();
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
/// We don't use equals here, because it doesn't respect custom type names.
|
||||
if (variants[i]->getName() == type_name)
|
||||
return i;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
size_t DataTypeVariant::getMaximumSizeOfValueInMemory() const
|
||||
{
|
||||
size_t max_size = 0;
|
||||
for (const auto & elem : variants)
|
||||
{
|
||||
size_t elem_max_size = elem->getMaximumSizeOfValueInMemory();
|
||||
if (elem_max_size > max_size)
|
||||
max_size = elem_max_size;
|
||||
}
|
||||
return max_size;
|
||||
}
|
||||
|
||||
SerializationPtr DataTypeVariant::doGetDefaultSerialization() const
|
||||
{
|
||||
SerializationVariant::VariantSerializations serializations;
|
||||
serializations.reserve(variants.size());
|
||||
Names variant_names;
|
||||
variant_names.reserve(variants.size());
|
||||
|
||||
for (const auto & variant : variants)
|
||||
{
|
||||
serializations.push_back(variant->getDefaultSerialization());
|
||||
variant_names.push_back(variant->getName());
|
||||
}
|
||||
|
||||
return std::make_shared<SerializationVariant>(std::move(serializations), std::move(variant_names), SerializationVariant::getVariantsDeserializeTextOrder(variants), getName());
|
||||
}
|
||||
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
if (!arguments || arguments->children.empty())
|
||||
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty");
|
||||
|
||||
DataTypes nested_types;
|
||||
nested_types.reserve(arguments->children.size());
|
||||
|
||||
for (const ASTPtr & child : arguments->children)
|
||||
nested_types.emplace_back(DataTypeFactory::instance().get(child));
|
||||
|
||||
return std::make_shared<DataTypeVariant>(nested_types);
|
||||
}
|
||||
|
||||
bool isVariantExtension(const DataTypePtr & from_type, const DataTypePtr & to_type)
|
||||
{
|
||||
const auto * from_variant = typeid_cast<const DataTypeVariant *>(from_type.get());
|
||||
const auto * to_variant = typeid_cast<const DataTypeVariant *>(to_type.get());
|
||||
if (!from_variant || !to_variant)
|
||||
return false;
|
||||
|
||||
const auto & to_variants = to_variant->getVariants();
|
||||
std::unordered_set<String> to_variant_types;
|
||||
to_variant_types.reserve(to_variants.size());
|
||||
for (const auto & variant : to_variants)
|
||||
to_variant_types.insert(variant->getName());
|
||||
|
||||
for (const auto & variant : from_variant->getVariants())
|
||||
{
|
||||
if (!to_variant_types.contains(variant->getName()))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void registerDataTypeVariant(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerDataType("Variant", create);
|
||||
}
|
||||
|
||||
}
|
68
src/DataTypes/DataTypeVariant.h
Normal file
68
src/DataTypes/DataTypeVariant.h
Normal file
@ -0,0 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <optional>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Variant data type.
|
||||
* This type represents a union of other data types.
|
||||
* For example, type Variant(T1, T2, ..., TN) means that each row of this type
|
||||
* has a value of either type T1 or T2 or ... or TN or none of them (NULL value).
|
||||
* Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types are not allowed
|
||||
* inside Variant type.
|
||||
* The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
|
||||
* To have global order of nested types we sort variants by type names on Variant creation.
|
||||
* The index of a variant in a sorted list is called global variant discriminator.
|
||||
*/
|
||||
class DataTypeVariant final : public IDataType
|
||||
{
|
||||
private:
|
||||
DataTypes variants;
|
||||
|
||||
public:
|
||||
static constexpr bool is_parametric = true;
|
||||
|
||||
explicit DataTypeVariant(const DataTypes & variants_);
|
||||
|
||||
TypeIndex getTypeId() const override { return TypeIndex::Variant; }
|
||||
const char * getFamilyName() const override { return "Variant"; }
|
||||
|
||||
bool canBeInsideNullable() const override { return false; }
|
||||
bool supportsSparseSerialization() const override { return false; }
|
||||
bool canBeInsideSparseColumns() const override { return false; }
|
||||
|
||||
MutableColumnPtr createColumn() const override;
|
||||
|
||||
Field getDefault() const override;
|
||||
|
||||
bool equals(const IDataType & rhs) const override;
|
||||
|
||||
bool isParametric() const override { return true; }
|
||||
bool haveSubtypes() const override { return true; }
|
||||
bool textCanContainOnlyValidUTF8() const override;
|
||||
bool haveMaximumSizeOfValue() const override;
|
||||
bool hasDynamicSubcolumns() const override;
|
||||
size_t getMaximumSizeOfValueInMemory() const override;
|
||||
|
||||
const DataTypePtr & getVariant(size_t i) const { return variants[i]; }
|
||||
const DataTypes & getVariants() const { return variants; }
|
||||
|
||||
/// Check if Variant has provided type in the list of variants and return its discriminator.
|
||||
std::optional<ColumnVariant::Discriminator> tryGetVariantDiscriminator(const DataTypePtr & type) const;
|
||||
|
||||
private:
|
||||
std::string doGetName() const override;
|
||||
std::string doGetPrettyName(size_t indent) const override;
|
||||
SerializationPtr doGetDefaultSerialization() const override;
|
||||
};
|
||||
|
||||
/// Check if conversion from from_type to to_type is Variant extension
|
||||
/// (both types are Variants and to_type contains all variants from from_type).
|
||||
bool isVariantExtension(const DataTypePtr & from_type, const DataTypePtr & to_type);
|
||||
|
||||
}
|
||||
|
@ -74,6 +74,25 @@ T EnumValues<T>::getValue(StringRef field_name, bool try_treat_as_id) const
|
||||
return it->getMapped();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool EnumValues<T>::tryGetValue(T & x, StringRef field_name, bool try_treat_as_id) const
|
||||
{
|
||||
const auto it = name_to_value_map.find(field_name);
|
||||
if (!it)
|
||||
{
|
||||
/// It is used in CSV and TSV input formats. If we fail to find given string in
|
||||
/// enum names, we will try to treat it as enum id.
|
||||
if (try_treat_as_id)
|
||||
{
|
||||
ReadBufferFromMemory tmp_buf(field_name.data, field_name.size);
|
||||
return tryReadText(x, tmp_buf) && tmp_buf.eof() && value_to_name_map.contains(x);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
x = it->getMapped();
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Names EnumValues<T>::getAllRegisteredNames() const
|
||||
{
|
||||
|
@ -7,7 +7,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
namespace ErrorCodesEnumValues
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
@ -42,6 +42,11 @@ public:
|
||||
return it;
|
||||
}
|
||||
|
||||
bool hasValue(const T & value) const
|
||||
{
|
||||
return value_to_name_map.contains(value);
|
||||
}
|
||||
|
||||
/// throws exception if value is not valid
|
||||
const StringRef & getNameForValue(const T & value) const
|
||||
{
|
||||
@ -60,6 +65,7 @@ public:
|
||||
}
|
||||
|
||||
T getValue(StringRef field_name, bool try_treat_as_id = false) const;
|
||||
bool tryGetValue(T & x, StringRef field_name, bool try_treat_as_id = false) const;
|
||||
|
||||
template <typename TValues>
|
||||
bool containsAll(const TValues & rhs_values) const
|
||||
|
@ -109,11 +109,26 @@ Ptr IDataType::getForSubcolumn(
|
||||
bool throw_if_null) const
|
||||
{
|
||||
Ptr res;
|
||||
forEachSubcolumn([&](const auto &, const auto & name, const auto & subdata)
|
||||
|
||||
ISerialization::StreamCallback callback_with_data = [&](const auto & subpath)
|
||||
{
|
||||
if (name == subcolumn_name)
|
||||
res = subdata.*member;
|
||||
}, data);
|
||||
for (size_t i = 0; i < subpath.size(); ++i)
|
||||
{
|
||||
size_t prefix_len = i + 1;
|
||||
if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, prefix_len))
|
||||
{
|
||||
auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
|
||||
/// Create data from path only if it's requested subcolumn.
|
||||
if (name == subcolumn_name)
|
||||
res = ISerialization::createFromPath(subpath, prefix_len).*member;
|
||||
}
|
||||
subpath[i].visited = true;
|
||||
}
|
||||
};
|
||||
|
||||
ISerialization::EnumerateStreamsSettings settings;
|
||||
settings.position_independent_encoding = false;
|
||||
data.serialization->enumerateStreams(settings, callback_with_data, data);
|
||||
|
||||
if (!res && throw_if_null)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
|
||||
|
@ -150,7 +150,7 @@ public:
|
||||
|
||||
/** Create ColumnConst for corresponding type, with specified size and value.
|
||||
*/
|
||||
ColumnPtr createColumnConst(size_t size, const Field & field) const;
|
||||
virtual ColumnPtr createColumnConst(size_t size, const Field & field) const;
|
||||
ColumnPtr createColumnConstWithDefaultValue(size_t size) const;
|
||||
|
||||
/** Get default value of data type.
|
||||
@ -412,6 +412,8 @@ struct WhichDataType
|
||||
constexpr bool isSimple() const { return isInt() || isUInt() || isFloat() || isString(); }
|
||||
|
||||
constexpr bool isLowCardinality() const { return idx == TypeIndex::LowCardinality; }
|
||||
|
||||
constexpr bool isVariant() const { return idx == TypeIndex::Variant; }
|
||||
};
|
||||
|
||||
/// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
|
||||
@ -464,6 +466,7 @@ template <typename T> inline bool isTuple(const T & data_type) { return WhichDat
|
||||
template <typename T> inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); }
|
||||
template <typename T> inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); }
|
||||
template <typename T> inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); }
|
||||
template <typename T> inline bool isVariant(const T & data_type) { return WhichDataType(data_type).isVariant(); }
|
||||
|
||||
template <typename T> inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); }
|
||||
|
||||
|
@ -54,6 +54,7 @@ const std::set<SubstreamType> ISerialization::Substream::named_types
|
||||
TupleElement,
|
||||
NamedOffsets,
|
||||
NamedNullMap,
|
||||
NamedVariantDiscriminators,
|
||||
};
|
||||
|
||||
String ISerialization::Substream::toString() const
|
||||
@ -61,6 +62,9 @@ String ISerialization::Substream::toString() const
|
||||
if (named_types.contains(type))
|
||||
return fmt::format("{}({})", type, name_of_substream);
|
||||
|
||||
if (type == VariantElement)
|
||||
return fmt::format("VariantElement({})", variant_element_name);
|
||||
|
||||
return String(magic_enum::enum_name(type));
|
||||
}
|
||||
|
||||
@ -186,6 +190,12 @@ String getNameForSubstreamPath(
|
||||
else
|
||||
stream_name += substream_name;
|
||||
}
|
||||
else if (it->type == Substream::VariantDiscriminators)
|
||||
stream_name += ".variant_discr";
|
||||
else if (it->type == Substream::VariantOffsets)
|
||||
stream_name += ".variant_offsets";
|
||||
else if (it->type == Substream::VariantElement)
|
||||
stream_name += "." + it->variant_element_name;
|
||||
}
|
||||
|
||||
return stream_name;
|
||||
@ -274,6 +284,53 @@ bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path)
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename F>
|
||||
bool tryDeserializeText(const F deserialize, DB::IColumn & column)
|
||||
{
|
||||
size_t prev_size = column.size();
|
||||
try
|
||||
{
|
||||
deserialize(column);
|
||||
return true;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (column.size() > prev_size)
|
||||
column.popBack(column.size() - prev_size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool ISerialization::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeTextCSV(my_column, istr, settings); }, column);
|
||||
}
|
||||
|
||||
bool ISerialization::tryDeserializeTextEscaped(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeTextEscaped(my_column, istr, settings); }, column);
|
||||
}
|
||||
|
||||
bool ISerialization::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeTextJSON(my_column, istr, settings); }, column);
|
||||
}
|
||||
|
||||
bool ISerialization::tryDeserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeTextQuoted(my_column, istr, settings); }, column);
|
||||
}
|
||||
|
||||
bool ISerialization::tryDeserializeWholeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeWholeText(my_column, istr, settings); }, column);
|
||||
}
|
||||
|
||||
void ISerialization::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
@ -283,6 +340,15 @@ void ISerialization::deserializeTextRaw(IColumn & column, ReadBuffer & istr, con
|
||||
deserializeWholeText(column, buf, settings);
|
||||
}
|
||||
|
||||
bool ISerialization::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
/// Read until \t or \n.
|
||||
readString(field, istr);
|
||||
ReadBufferFromString buf(field);
|
||||
return tryDeserializeWholeText(column, buf, settings);
|
||||
}
|
||||
|
||||
void ISerialization::serializeTextMarkdown(
|
||||
const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
@ -310,7 +376,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref
|
||||
size_t last_elem = prefix_len - 1;
|
||||
return path[last_elem].type == Substream::NullMap
|
||||
|| path[last_elem].type == Substream::TupleElement
|
||||
|| path[last_elem].type == Substream::ArraySizes;
|
||||
|| path[last_elem].type == Substream::ArraySizes
|
||||
|| path[last_elem].type == Substream::VariantElement;
|
||||
}
|
||||
|
||||
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
|
||||
@ -339,6 +406,8 @@ void ISerialization::throwUnexpectedDataAfterParsedValue(IColumn & column, ReadB
|
||||
{
|
||||
WriteBufferFromOwnString ostr;
|
||||
serializeText(column, column.size() - 1, ostr, settings);
|
||||
/// Restore correct column size.
|
||||
column.popBack(1);
|
||||
throw Exception(
|
||||
ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE,
|
||||
"Unexpected data '{}' after parsed {} value '{}'",
|
||||
|
@ -154,6 +154,12 @@ public:
|
||||
ObjectStructure,
|
||||
ObjectData,
|
||||
|
||||
VariantDiscriminators,
|
||||
NamedVariantDiscriminators,
|
||||
VariantOffsets,
|
||||
VariantElements,
|
||||
VariantElement,
|
||||
|
||||
Regular,
|
||||
};
|
||||
|
||||
@ -162,6 +168,9 @@ public:
|
||||
|
||||
Type type;
|
||||
|
||||
/// The name of a variant element type.
|
||||
String variant_element_name;
|
||||
|
||||
/// Name of substream for type from 'named_types'.
|
||||
String name_of_substream;
|
||||
|
||||
@ -321,17 +330,20 @@ public:
|
||||
virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
|
||||
|
||||
virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
|
||||
virtual bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
|
||||
|
||||
/** Text serialization as a literal that may be inserted into a query.
|
||||
*/
|
||||
virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
|
||||
|
||||
virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
|
||||
virtual bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
|
||||
|
||||
/** Text serialization for the CSV format.
|
||||
*/
|
||||
virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
|
||||
virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
|
||||
virtual bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
|
||||
|
||||
/** Text serialization for displaying on a terminal or saving into a text file, and the like.
|
||||
* Without escaping or quoting.
|
||||
@ -341,11 +353,13 @@ public:
|
||||
/** Text deserialization in case when buffer contains only one value, without any escaping and delimiters.
|
||||
*/
|
||||
virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
|
||||
virtual bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
|
||||
|
||||
/** Text serialization intended for using in JSON format.
|
||||
*/
|
||||
virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
|
||||
virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
|
||||
virtual bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
|
||||
virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t /*indent*/) const
|
||||
{
|
||||
serializeTextJSON(column, row_num, ostr, settings);
|
||||
@ -365,6 +379,7 @@ public:
|
||||
* additional code in data types serialization and ReadHelpers.
|
||||
*/
|
||||
virtual void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
|
||||
virtual bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
|
||||
virtual void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
|
||||
|
||||
virtual void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
|
||||
|
@ -419,9 +419,11 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe
|
||||
}
|
||||
|
||||
|
||||
template <typename Reader>
|
||||
static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
|
||||
template <typename ReturnType = void, typename Reader>
|
||||
static ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
|
||||
ColumnArray::Offsets & offsets = column_array.getOffsets();
|
||||
|
||||
@ -433,7 +435,18 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
|
||||
if (checkChar('[', istr))
|
||||
has_braces = true;
|
||||
else if (!allow_unenclosed)
|
||||
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
auto on_error_no_throw = [&]()
|
||||
{
|
||||
if (size)
|
||||
nested_column.popBack(size);
|
||||
return ReturnType(false);
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
@ -443,11 +456,17 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
|
||||
if (!first)
|
||||
{
|
||||
if (*istr.position() == ',')
|
||||
{
|
||||
++istr.position();
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
|
||||
"Cannot read array from text, expected comma or end of array, found '{}'",
|
||||
*istr.position());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
|
||||
"Cannot read array from text, expected comma or end of array, found '{}'",
|
||||
*istr.position());
|
||||
return on_error_no_throw();
|
||||
}
|
||||
}
|
||||
|
||||
first = false;
|
||||
@ -457,25 +476,42 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
|
||||
if (*istr.position() == ']')
|
||||
break;
|
||||
|
||||
read_nested(nested_column);
|
||||
if constexpr (throw_exception)
|
||||
read_nested(nested_column);
|
||||
else if (!read_nested(nested_column))
|
||||
return on_error_no_throw();
|
||||
|
||||
++size;
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
|
||||
if (has_braces)
|
||||
assertChar(']', istr);
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
assertChar(']', istr);
|
||||
else if (!checkChar(']', istr))
|
||||
return on_error_no_throw();
|
||||
}
|
||||
else /// If array is not enclosed in braces, we read until EOF.
|
||||
assertEOF(istr);
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
assertEOF(istr);
|
||||
else if (!istr.eof())
|
||||
return on_error_no_throw();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (size)
|
||||
nested_column.popBack(size);
|
||||
throw;
|
||||
if constexpr (throw_exception)
|
||||
throw;
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
offsets.push_back(offsets.back() + size);
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
|
||||
@ -494,8 +530,8 @@ void SerializationArray::deserializeText(IColumn & column, ReadBuffer & istr, co
|
||||
deserializeTextImpl(column, istr,
|
||||
[&](IColumn & nested_column)
|
||||
{
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextQuotedImpl(nested_column, istr, settings, nested);
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(nested_column, istr, settings, nested);
|
||||
else
|
||||
nested->deserializeTextQuoted(nested_column, istr, settings);
|
||||
}, false);
|
||||
@ -504,6 +540,29 @@ void SerializationArray::deserializeText(IColumn & column, ReadBuffer & istr, co
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Array");
|
||||
}
|
||||
|
||||
bool SerializationArray::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
||||
{
|
||||
auto read_nested = [&](IColumn & nested_column)
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
|
||||
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(nested_column, istr, settings, nested);
|
||||
return nested->tryDeserializeTextQuoted(nested_column, istr, settings);
|
||||
};
|
||||
|
||||
bool ok = deserializeTextImpl<bool>(column, istr, std::move(read_nested), false);
|
||||
|
||||
if (!ok)
|
||||
return false;
|
||||
|
||||
if (whole && !istr.eof())
|
||||
{
|
||||
column.popBack(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
|
||||
@ -559,13 +618,25 @@ void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
deserializeTextImpl(column, istr,
|
||||
[&](IColumn & nested_column)
|
||||
{
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextJSONImpl(nested_column, istr, settings, nested);
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested);
|
||||
else
|
||||
nested->deserializeTextJSON(nested_column, istr, settings);
|
||||
}, false);
|
||||
}
|
||||
|
||||
bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
auto read_nested = [&](IColumn & nested_column)
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
|
||||
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested);
|
||||
return nested->tryDeserializeTextJSON(nested_column, istr, settings);
|
||||
};
|
||||
|
||||
return deserializeTextImpl<bool>(column, istr, std::move(read_nested), false);
|
||||
}
|
||||
|
||||
|
||||
void SerializationArray::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
@ -608,8 +679,8 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
||||
deserializeTextImpl(column, rb,
|
||||
[&](IColumn & nested_column)
|
||||
{
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextCSVImpl(nested_column, rb, settings, nested);
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(nested_column, rb, settings, nested);
|
||||
else
|
||||
nested->deserializeTextCSV(nested_column, rb, settings);
|
||||
}, true);
|
||||
@ -619,12 +690,43 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
||||
deserializeTextImpl(column, rb,
|
||||
[&](IColumn & nested_column)
|
||||
{
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextQuotedImpl(nested_column, rb, settings, nested);
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(nested_column, rb, settings, nested);
|
||||
else
|
||||
nested->deserializeTextQuoted(nested_column, rb, settings);
|
||||
}, true);
|
||||
}
|
||||
}
|
||||
|
||||
bool SerializationArray::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String s;
|
||||
if (!tryReadCSV(s, istr, settings.csv))
|
||||
return false;
|
||||
ReadBufferFromString rb(s);
|
||||
|
||||
if (settings.csv.arrays_as_nested_csv)
|
||||
{
|
||||
auto read_nested = [&](IColumn & nested_column)
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
|
||||
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(nested_column, rb, settings, nested);
|
||||
return nested->tryDeserializeTextCSV(nested_column, rb, settings);
|
||||
};
|
||||
|
||||
return deserializeTextImpl<bool>(column, rb, read_nested, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto read_nested = [&](IColumn & nested_column)
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
|
||||
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(nested_column, rb, settings, nested);
|
||||
return nested->tryDeserializeTextQuoted(nested_column, rb, settings);
|
||||
};
|
||||
|
||||
return deserializeTextImpl<bool>(column, rb, read_nested, true);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -20,15 +20,18 @@ public:
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
|
||||
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
|
||||
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
/** Streaming serialization of arrays is arranged in a special way:
|
||||
* - elements placed in a row are written/read without array sizes;
|
||||
|
@ -150,30 +150,42 @@ bool tryDeserializeAllVariants(ColumnUInt8 * column, ReadBuffer & istr)
|
||||
return true;
|
||||
}
|
||||
|
||||
void deserializeImpl(
|
||||
template <typename ReturnType = void>
|
||||
ReturnType deserializeImpl(
|
||||
IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function<bool(ReadBuffer &)> check_end_of_value)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
|
||||
auto restore_column_if_needed = [&, prev_size = col->size()]()
|
||||
{
|
||||
if (col->size() > prev_size)
|
||||
col->popBack(1);
|
||||
};
|
||||
|
||||
PeekableReadBuffer buf(istr);
|
||||
buf.setCheckpoint();
|
||||
if (checkString(settings.bool_true_representation, buf) && check_end_of_value(buf))
|
||||
{
|
||||
col->insert(true);
|
||||
return;
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
buf.rollbackToCheckpoint();
|
||||
if (checkString(settings.bool_false_representation, buf) && check_end_of_value(buf))
|
||||
{
|
||||
col->insert(false);
|
||||
buf.dropCheckpoint();
|
||||
if (buf.hasUnreadData())
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
|
||||
"bool_true_representation or bool_false_representation contains some delimiters of input format");
|
||||
return;
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
|
||||
"bool_true_representation or bool_false_representation contains some delimiters of input format");
|
||||
return ReturnType(false);
|
||||
}
|
||||
col->insert(false);
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
buf.rollbackToCheckpoint();
|
||||
@ -181,22 +193,31 @@ void deserializeImpl(
|
||||
{
|
||||
buf.dropCheckpoint();
|
||||
if (buf.hasUnreadData())
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
|
||||
"bool_true_representation or bool_false_representation contains some delimiters of input format");
|
||||
return;
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
|
||||
"bool_true_representation or bool_false_representation contains some delimiters of input format");
|
||||
restore_column_if_needed();
|
||||
return ReturnType(false);
|
||||
}
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
buf.makeContinuousMemoryFromCheckpointToPos();
|
||||
buf.rollbackToCheckpoint();
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and "
|
||||
"bool_false_representation or one of "
|
||||
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0",
|
||||
String(buf.position(), std::min(10lu, buf.available())),
|
||||
settings.bool_true_representation, settings.bool_false_representation);
|
||||
restore_column_if_needed();
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and "
|
||||
"bool_false_representation or one of "
|
||||
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0",
|
||||
String(buf.position(), std::min(10lu, buf.available())),
|
||||
settings.bool_true_representation, settings.bool_false_representation);
|
||||
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
}
|
||||
@ -225,6 +246,14 @@ void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & is
|
||||
deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
|
||||
}
|
||||
|
||||
bool SerializationBool::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
return deserializeImpl<bool>(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
|
||||
}
|
||||
|
||||
void SerializationBool::serializeTextJSON(const IColumn &column, size_t row_num, WriteBuffer &ostr, const FormatSettings &settings) const
|
||||
{
|
||||
serializeSimple(column, row_num, ostr, settings);
|
||||
@ -250,6 +279,33 @@ void SerializationBool::deserializeTextJSON(IColumn &column, ReadBuffer &istr, c
|
||||
col->insert(value);
|
||||
}
|
||||
|
||||
bool SerializationBool::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
|
||||
{
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
|
||||
bool value = false;
|
||||
char first_char = *istr.position();
|
||||
if (first_char == 't' || first_char == 'f')
|
||||
{
|
||||
if (!readBoolTextWord<bool>(value, istr))
|
||||
return false;
|
||||
}
|
||||
else if (first_char == '1' || first_char == '0')
|
||||
{
|
||||
/// Doesn't throw.
|
||||
readBoolText(value, istr);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
col->insert(value);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationBool::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeCustom(column, row_num, ostr, settings);
|
||||
@ -263,6 +319,14 @@ void SerializationBool::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
||||
deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r'; });
|
||||
}
|
||||
|
||||
bool SerializationBool::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
return deserializeImpl<bool>(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r'; });
|
||||
}
|
||||
|
||||
void SerializationBool::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeCustom(column, row_num, ostr, settings);
|
||||
@ -276,15 +340,30 @@ void SerializationBool::deserializeTextRaw(IColumn & column, ReadBuffer & istr,
|
||||
deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
|
||||
}
|
||||
|
||||
bool SerializationBool::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
return deserializeImpl<bool>(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
|
||||
}
|
||||
|
||||
void SerializationBool::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeSimple(column, row_num, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
template <typename ReturnType>
|
||||
ReturnType deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
if (istr.eof())
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
auto * col = checkAndGetDeserializeColumnType(column);
|
||||
|
||||
@ -292,11 +371,17 @@ void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
|
||||
switch (symbol)
|
||||
{
|
||||
case 't':
|
||||
assertStringCaseInsensitive("true", istr);
|
||||
if constexpr (throw_exception)
|
||||
assertStringCaseInsensitive("true", istr);
|
||||
else if (!checkStringCaseInsensitive("true", istr))
|
||||
return ReturnType(false);
|
||||
col->insert(true);
|
||||
break;
|
||||
case 'f':
|
||||
assertStringCaseInsensitive("false", istr);
|
||||
if constexpr (throw_exception)
|
||||
assertStringCaseInsensitive("false", istr);
|
||||
else if (!checkStringCaseInsensitive("false", istr))
|
||||
return ReturnType(false);
|
||||
col->insert(false);
|
||||
break;
|
||||
case '1':
|
||||
@ -307,16 +392,40 @@ void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
|
||||
break;
|
||||
case '\'':
|
||||
++istr.position();
|
||||
deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return !buf.eof() && *buf.position() == '\''; });
|
||||
assertChar('\'', istr);
|
||||
if constexpr (throw_exception)
|
||||
{
|
||||
deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return !buf.eof() && *buf.position() == '\''; });
|
||||
assertChar('\'', istr);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!deserializeImpl<bool>(column, istr, settings, [](ReadBuffer & buf) { return !buf.eof() && *buf.position() == '\''; }) || !checkChar('\'', istr))
|
||||
return ReturnType(false);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Cannot parse boolean value here: '{}', should be true/false, 1/0 or on of "
|
||||
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0 in quotes",
|
||||
String(istr.position(), std::min(10ul, istr.available())));
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Cannot parse boolean value here: '{}', should be true/false, 1/0 or on of "
|
||||
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0 in quotes",
|
||||
String(istr.position(), std::min(10ul, istr.available())));
|
||||
return ReturnType(false);
|
||||
}
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
deserializeTextQuotedImpl<void>(column, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationBool::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return deserializeTextQuotedImpl<bool>(column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationBool::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -327,6 +436,14 @@ void SerializationBool::deserializeWholeText(IColumn & column, ReadBuffer & istr
|
||||
deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof(); });
|
||||
}
|
||||
|
||||
bool SerializationBool::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
return deserializeImpl<bool>(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof(); });
|
||||
}
|
||||
|
||||
void SerializationBool::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeSimple(column, row_num, ostr, settings);
|
||||
|
@ -15,21 +15,27 @@ public:
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
};
|
||||
|
@ -24,6 +24,12 @@ void deserializeFromString(const SerializationCustomSimpleText & domain, IColumn
|
||||
domain.deserializeText(column, istr, settings, true);
|
||||
}
|
||||
|
||||
bool tryDeserializeFromString(const SerializationCustomSimpleText & domain, IColumn & column, const String & s, const FormatSettings & settings)
|
||||
{
|
||||
ReadBufferFromString istr(s);
|
||||
return domain.tryDeserializeText(column, istr, settings, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -34,6 +40,19 @@ SerializationCustomSimpleText::SerializationCustomSimpleText(const Serialization
|
||||
{
|
||||
}
|
||||
|
||||
bool SerializationCustomSimpleText::tryDeserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, bool whole) const
|
||||
{
|
||||
try
|
||||
{
|
||||
deserializeText(column, istr, settings, whole);
|
||||
return true;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void SerializationCustomSimpleText::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
@ -41,6 +60,13 @@ void SerializationCustomSimpleText::deserializeWholeText(IColumn & column, ReadB
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
bool SerializationCustomSimpleText::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
readStringUntilEOF(str, istr);
|
||||
return tryDeserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void SerializationCustomSimpleText::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeEscapedString(serializeToString(*this, column, row_num, settings), ostr);
|
||||
@ -53,6 +79,13 @@ void SerializationCustomSimpleText::deserializeTextEscaped(IColumn & column, Rea
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
bool SerializationCustomSimpleText::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
readEscapedString(str, istr);
|
||||
return tryDeserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void SerializationCustomSimpleText::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeQuotedString(serializeToString(*this, column, row_num, settings), ostr);
|
||||
@ -65,6 +98,14 @@ void SerializationCustomSimpleText::deserializeTextQuoted(IColumn & column, Read
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
bool SerializationCustomSimpleText::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
if (!tryReadQuotedString(str, istr))
|
||||
return false;
|
||||
return tryDeserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void SerializationCustomSimpleText::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeCSVString(serializeToString(*this, column, row_num, settings), ostr);
|
||||
@ -77,6 +118,13 @@ void SerializationCustomSimpleText::deserializeTextCSV(IColumn & column, ReadBuf
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
bool SerializationCustomSimpleText::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
readCSVStringInto<String, false, false>(str, istr, settings.csv);
|
||||
return tryDeserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void SerializationCustomSimpleText::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeJSONString(serializeToString(*this, column, row_num, settings), ostr, settings);
|
||||
@ -89,6 +137,14 @@ void SerializationCustomSimpleText::deserializeTextJSON(IColumn & column, ReadBu
|
||||
deserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
bool SerializationCustomSimpleText::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String str;
|
||||
if (!tryReadJSONStringInto(str, istr))
|
||||
return false;
|
||||
return tryDeserializeFromString(*this, column, str, settings);
|
||||
}
|
||||
|
||||
void SerializationCustomSimpleText::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeXMLStringForTextElement(serializeToString(*this, column, row_num, settings), ostr);
|
||||
|
@ -22,20 +22,24 @@ public:
|
||||
/// whole = true means that buffer contains only one value, so we should read until EOF.
|
||||
/// It's needed to check if there is garbage after parsed field.
|
||||
virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const = 0;
|
||||
virtual bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const;
|
||||
|
||||
/** Text deserialization in case when buffer contains only one value, without any escaping and delimiters.
|
||||
*/
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
/** Text serialization with escaping but without quoting.
|
||||
*/
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
/** Text serialization as a literal that may be inserted into a query.
|
||||
*/
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
/** Text serialization for the CSV format.
|
||||
*/
|
||||
@ -44,12 +48,14 @@ public:
|
||||
* (the delimiter is not consumed).
|
||||
*/
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
/** Text serialization intended for using in JSON format.
|
||||
* force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes.
|
||||
*/
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
/** Text serialization for putting into the XML format.
|
||||
*/
|
||||
|
@ -22,6 +22,15 @@ void SerializationDate::deserializeWholeText(IColumn & column, ReadBuffer & istr
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Date");
|
||||
}
|
||||
|
||||
bool SerializationDate::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
DayNum x;
|
||||
if (!tryReadDateText(x, istr, time_zone) || !istr.eof())
|
||||
return false;
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
DayNum x;
|
||||
@ -29,6 +38,15 @@ void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & is
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
bool SerializationDate::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
DayNum x;
|
||||
if (!tryReadDateText(x, istr, time_zone))
|
||||
return false;
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDate::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
@ -50,6 +68,16 @@ void SerializationDate::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
|
||||
}
|
||||
|
||||
bool SerializationDate::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
DayNum x;
|
||||
if (!checkChar('\'', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('\'', istr))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDate::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -66,6 +94,15 @@ void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
bool SerializationDate::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
DayNum x;
|
||||
if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr))
|
||||
return false;
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDate::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -80,6 +117,15 @@ void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(value);
|
||||
}
|
||||
|
||||
bool SerializationDate::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
DayNum value;
|
||||
if (!tryReadCSV(value, istr, time_zone))
|
||||
return false;
|
||||
assert_cast<ColumnUInt16 &>(column).getData().push_back(value);
|
||||
return true;
|
||||
}
|
||||
|
||||
SerializationDate::SerializationDate(const DateLUTImpl & time_zone_) : time_zone(time_zone_)
|
||||
{
|
||||
}
|
||||
|
@ -13,14 +13,19 @@ public:
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
protected:
|
||||
const DateLUTImpl & time_zone;
|
||||
|
@ -21,6 +21,15 @@ void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & is
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Date32");
|
||||
}
|
||||
|
||||
bool SerializationDate32::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
ExtendedDayNum x;
|
||||
if (!tryReadDateText(x, istr, time_zone) || !istr.eof())
|
||||
return false;
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
ExtendedDayNum x;
|
||||
@ -28,6 +37,15 @@ void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer &
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
bool SerializationDate32::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
ExtendedDayNum x;
|
||||
if (!tryReadDateText(x, istr, time_zone))
|
||||
return false;
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDate32::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
@ -49,6 +67,15 @@ void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & i
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
|
||||
}
|
||||
|
||||
bool SerializationDate32::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
ExtendedDayNum x;
|
||||
if (!checkChar('\'', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('\'', istr))
|
||||
return false;
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -65,6 +92,15 @@ void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & ist
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
bool SerializationDate32::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
ExtendedDayNum x;
|
||||
if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr))
|
||||
return false;
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDate32::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -79,6 +115,15 @@ void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(value.getExtenedDayNum());
|
||||
}
|
||||
|
||||
bool SerializationDate32::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
LocalDate value;
|
||||
if (!tryReadCSV(value, istr))
|
||||
return false;
|
||||
assert_cast<ColumnInt32 &>(column).getData().push_back(value.getExtenedDayNum());
|
||||
return true;
|
||||
}
|
||||
|
||||
SerializationDate32::SerializationDate32(const DateLUTImpl & time_zone_) : time_zone(time_zone_)
|
||||
{
|
||||
}
|
||||
|
@ -12,14 +12,19 @@ public:
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
protected:
|
||||
const DateLUTImpl & time_zone;
|
||||
|
@ -21,15 +21,56 @@ inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & setti
|
||||
switch (settings.date_time_input_format)
|
||||
{
|
||||
case FormatSettings::DateTimeInputFormat::Basic:
|
||||
readDateTimeText(x, istr, time_zone);
|
||||
return;
|
||||
readDateTimeTextImpl<>(x, istr, time_zone);
|
||||
break;
|
||||
case FormatSettings::DateTimeInputFormat::BestEffort:
|
||||
parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone);
|
||||
return;
|
||||
break;
|
||||
case FormatSettings::DateTimeInputFormat::BestEffortUS:
|
||||
parseDateTimeBestEffortUS(x, istr, time_zone, utc_time_zone);
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
}
|
||||
|
||||
inline void readAsIntText(time_t & x, ReadBuffer & istr)
|
||||
{
|
||||
readIntText(x, istr);
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
}
|
||||
|
||||
inline bool tryReadText(time_t & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
|
||||
{
|
||||
bool res;
|
||||
switch (settings.date_time_input_format)
|
||||
{
|
||||
case FormatSettings::DateTimeInputFormat::Basic:
|
||||
res = tryReadDateTimeText(x, istr, time_zone);
|
||||
break;
|
||||
case FormatSettings::DateTimeInputFormat::BestEffort:
|
||||
res = tryParseDateTimeBestEffort(x, istr, time_zone, utc_time_zone);
|
||||
break;
|
||||
case FormatSettings::DateTimeInputFormat::BestEffortUS:
|
||||
res = tryParseDateTimeBestEffortUS(x, istr, time_zone, utc_time_zone);
|
||||
break;
|
||||
}
|
||||
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
inline bool tryReadAsIntText(time_t & x, ReadBuffer & istr)
|
||||
{
|
||||
if (!tryReadIntText(x, istr))
|
||||
return false;
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@ -68,15 +109,32 @@ void SerializationDateTime::deserializeWholeText(IColumn & column, ReadBuffer &
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime");
|
||||
}
|
||||
|
||||
bool SerializationDateTime::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
time_t x = 0;
|
||||
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone) || !istr.eof())
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
time_t x = 0;
|
||||
readText(x, istr, settings, time_zone, utc_time_zone);
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
}
|
||||
|
||||
bool SerializationDateTime::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
time_t x = 0;
|
||||
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone))
|
||||
return false;
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('\'', ostr);
|
||||
@ -94,15 +152,32 @@ void SerializationDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer &
|
||||
}
|
||||
else /// Just 1504193808 or 01504193808
|
||||
{
|
||||
readIntText(x, istr);
|
||||
readAsIntText(x, istr);
|
||||
}
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
|
||||
/// It's important to do this at the end - for exception safety.
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
}
|
||||
|
||||
bool SerializationDateTime::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
time_t x = 0;
|
||||
if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
|
||||
{
|
||||
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone) || !checkChar('\'', istr))
|
||||
return false;
|
||||
}
|
||||
else /// Just 1504193808 or 01504193808
|
||||
{
|
||||
if (!tryReadAsIntText(x, istr))
|
||||
return false;
|
||||
}
|
||||
|
||||
/// It's important to do this at the end - for exception safety.
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -120,13 +195,30 @@ void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & i
|
||||
}
|
||||
else
|
||||
{
|
||||
readIntText(x, istr);
|
||||
readAsIntText(x, istr);
|
||||
}
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
}
|
||||
|
||||
bool SerializationDateTime::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
time_t x = 0;
|
||||
if (checkChar('"', istr))
|
||||
{
|
||||
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone) || !checkChar('"', istr))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!tryReadIntText(x, istr))
|
||||
return false;
|
||||
}
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -165,13 +257,48 @@ void SerializationDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & is
|
||||
readCSVString(datetime_str, istr, settings.csv);
|
||||
ReadBufferFromString buf(datetime_str);
|
||||
readText(x, buf, settings, time_zone, utc_time_zone);
|
||||
if (!buf.eof())
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime");
|
||||
}
|
||||
}
|
||||
|
||||
if (x < 0)
|
||||
x = 0;
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
}
|
||||
|
||||
bool SerializationDateTime::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
time_t x = 0;
|
||||
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
char maybe_quote = *istr.position();
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
{
|
||||
++istr.position();
|
||||
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone) || !checkChar(maybe_quote, istr))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
|
||||
{
|
||||
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
String datetime_str;
|
||||
readCSVString(datetime_str, istr, settings.csv);
|
||||
ReadBufferFromString buf(datetime_str);
|
||||
if (!tryReadText(x, buf, settings, time_zone, utc_time_zone) || !buf.eof())
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -15,14 +15,19 @@ public:
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -47,6 +47,16 @@ void SerializationDateTime64::deserializeText(IColumn & column, ReadBuffer & ist
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime64");
|
||||
}
|
||||
|
||||
bool SerializationDateTime64::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const
|
||||
{
|
||||
DateTime64 result = 0;
|
||||
if (!tryReadDateTime64Text(result, scale, istr, time_zone) || (whole && !istr.eof()))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(result);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime64::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
deserializeTextEscaped(column, istr, settings);
|
||||
@ -75,6 +85,29 @@ static inline void readText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, con
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool tryReadText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
|
||||
{
|
||||
switch (settings.date_time_input_format)
|
||||
{
|
||||
case FormatSettings::DateTimeInputFormat::Basic:
|
||||
return tryReadDateTime64Text(x, scale, istr, time_zone);
|
||||
case FormatSettings::DateTimeInputFormat::BestEffort:
|
||||
return tryParseDateTime64BestEffort(x, scale, istr, time_zone, utc_time_zone);
|
||||
case FormatSettings::DateTimeInputFormat::BestEffortUS:
|
||||
return tryParseDateTime64BestEffortUS(x, scale, istr, time_zone, utc_time_zone);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool SerializationDateTime64::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
DateTime64 x = 0;
|
||||
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone) || !istr.eof())
|
||||
return false;
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime64::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
DateTime64 x = 0;
|
||||
@ -82,6 +115,15 @@ void SerializationDateTime64::deserializeTextEscaped(IColumn & column, ReadBuffe
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
bool SerializationDateTime64::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
DateTime64 x = 0;
|
||||
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone))
|
||||
return false;
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime64::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('\'', ostr);
|
||||
@ -104,6 +146,23 @@ void SerializationDateTime64::deserializeTextQuoted(IColumn & column, ReadBuffer
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
|
||||
}
|
||||
|
||||
bool SerializationDateTime64::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
DateTime64 x = 0;
|
||||
if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
|
||||
{
|
||||
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone) || !checkChar('\'', istr))
|
||||
return false;
|
||||
}
|
||||
else /// Just 1504193808 or 01504193808
|
||||
{
|
||||
if (!tryReadIntText(x, istr))
|
||||
return false;
|
||||
}
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime64::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -126,6 +185,23 @@ void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer &
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
bool SerializationDateTime64::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
DateTime64 x = 0;
|
||||
if (checkChar('"', istr))
|
||||
{
|
||||
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone) || !checkChar('"', istr))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!tryReadIntText(x, istr))
|
||||
return false;
|
||||
}
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationDateTime64::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -170,4 +246,40 @@ void SerializationDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer &
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
bool SerializationDateTime64::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
DateTime64 x = 0;
|
||||
|
||||
if (istr.eof())
|
||||
return false;
|
||||
|
||||
char maybe_quote = *istr.position();
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
{
|
||||
++istr.position();
|
||||
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone) || !checkChar(maybe_quote, istr))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
|
||||
{
|
||||
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
String datetime_str;
|
||||
readCSVString(datetime_str, istr, settings.csv);
|
||||
ReadBufferFromString buf(datetime_str);
|
||||
if (!tryReadText(x, scale, buf, settings, time_zone, utc_time_zone) || !buf.eof())
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -15,15 +15,21 @@ public:
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -16,11 +16,19 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SerializationDecimal<T>::tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale)
|
||||
bool SerializationDecimal<T>::tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale, bool csv)
|
||||
{
|
||||
UInt32 unread_scale = scale;
|
||||
if (!tryReadDecimalText(istr, x, precision, unread_scale))
|
||||
return false;
|
||||
if (csv)
|
||||
{
|
||||
if (!tryReadCSVDecimalText(istr, x, precision, unread_scale))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!tryReadDecimalText(istr, x, precision, unread_scale))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (common::mulOverflow(x.value, DecimalUtils::scaleMultiplier<T>(unread_scale), x.value))
|
||||
return false;
|
||||
@ -59,6 +67,16 @@ void SerializationDecimal<T>::deserializeText(IColumn & column, ReadBuffer & ist
|
||||
ISerialization::throwUnexpectedDataAfterParsedValue(column, istr, settings, "Decimal");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SerializationDecimal<T>::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const
|
||||
{
|
||||
T x;
|
||||
if (!tryReadText(x, istr) || (whole && !istr.eof()))
|
||||
return false;
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SerializationDecimal<T>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
@ -67,6 +85,16 @@ void SerializationDecimal<T>::deserializeTextCSV(IColumn & column, ReadBuffer &
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SerializationDecimal<T>::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
T x;
|
||||
if (!tryReadText(x, istr, true))
|
||||
return false;
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SerializationDecimal<T>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
@ -88,6 +116,18 @@ void SerializationDecimal<T>::deserializeTextJSON(IColumn & column, ReadBuffer &
|
||||
assertChar('"', istr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SerializationDecimal<T>::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
bool have_quotes = checkChar('"', istr);
|
||||
T x;
|
||||
if (!tryReadText(x, istr) || (have_quotes && !checkChar('"', istr)))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template class SerializationDecimal<Decimal32>;
|
||||
template class SerializationDecimal<Decimal64>;
|
||||
|
@ -16,15 +16,19 @@ public:
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
|
||||
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void readText(T & x, ReadBuffer & istr, bool csv = false) const { readText(x, istr, this->precision, this->scale, csv); }
|
||||
bool tryReadText(T & x, ReadBuffer & istr, bool csv = false) const { return tryReadText(x, istr, this->precision, this->scale, csv); }
|
||||
|
||||
static void readText(T & x, ReadBuffer & istr, UInt32 precision_, UInt32 scale_, bool csv = false);
|
||||
static bool tryReadText(T & x, ReadBuffer & istr, UInt32 precision_, UInt32 scale_);
|
||||
static bool tryReadText(T & x, ReadBuffer & istr, UInt32 precision_, UInt32 scale_, bool csv = false);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -34,6 +34,27 @@ void SerializationEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffe
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
bool SerializationEnum<Type>::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
FieldType x;
|
||||
if (settings.tsv.enum_as_number)
|
||||
{
|
||||
if (!tryReadValue(istr, x))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string field_name;
|
||||
readEscapedString(field_name, istr);
|
||||
if (!ref_enum_values.tryGetValue(x, StringRef(field_name), true))
|
||||
return false;
|
||||
}
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
void SerializationEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
@ -48,6 +69,20 @@ void SerializationEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer
|
||||
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name)));
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
bool SerializationEnum<Type>::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
std::string field_name;
|
||||
if (!tryReadQuotedStringWithSQLStyle(field_name, istr))
|
||||
return false;
|
||||
|
||||
FieldType x;
|
||||
if (!ref_enum_values.tryGetValue(x, StringRef(field_name)))
|
||||
return false;
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
@ -65,6 +100,27 @@ void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
bool SerializationEnum<Type>::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
FieldType x;
|
||||
if (settings.tsv.enum_as_number)
|
||||
{
|
||||
if (!tryReadValue(istr, x) || !istr.eof())
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string field_name;
|
||||
readStringUntilEOF(field_name, istr);
|
||||
if (!ref_enum_values.tryGetValue(x, StringRef(field_name), true))
|
||||
return false;
|
||||
}
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
void SerializationEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
@ -90,6 +146,27 @@ void SerializationEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer &
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
bool SerializationEnum<Type>::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
FieldType x;
|
||||
if (!istr.eof() && *istr.position() != '"')
|
||||
{
|
||||
if (!tryReadValue(istr, x))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string field_name;
|
||||
readJSONString(field_name, istr);
|
||||
if (!ref_enum_values.tryGetValue(x, StringRef(field_name)))
|
||||
return false;
|
||||
}
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
void SerializationEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
@ -109,6 +186,28 @@ void SerializationEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer &
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
bool SerializationEnum<Type>::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
FieldType x;
|
||||
|
||||
if (settings.csv.enum_as_number)
|
||||
{
|
||||
if (!tryReadValue(istr, x))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string field_name;
|
||||
readCSVString(field_name, istr, settings.csv);
|
||||
if (!ref_enum_values.tryGetValue(x, StringRef(field_name), true))
|
||||
return false;
|
||||
}
|
||||
|
||||
assert_cast<ColumnType &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
void SerializationEnum<Type>::serializeTextMarkdown(
|
||||
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
|
@ -34,15 +34,20 @@ public:
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
@ -53,6 +58,11 @@ public:
|
||||
return ref_enum_values.findByValue(x)->first;
|
||||
}
|
||||
|
||||
bool tryReadValue(ReadBuffer & istr, FieldType & x) const
|
||||
{
|
||||
return tryReadText(x, istr) && ref_enum_values.hasValue(x);
|
||||
}
|
||||
|
||||
std::optional<EnumValues<Type>> own_enum_values;
|
||||
std::shared_ptr<const DataTypeEnum<Type>> own_enum_type;
|
||||
const EnumValues<Type> & ref_enum_values;
|
||||
|
@ -150,12 +150,49 @@ static inline void read(const SerializationFixedString & self, IColumn & column,
|
||||
}
|
||||
}
|
||||
|
||||
bool SerializationFixedString::tryAlignStringLength(size_t n, PaddedPODArray<UInt8> & data, size_t string_start)
|
||||
{
|
||||
size_t length = data.size() - string_start;
|
||||
if (length < n)
|
||||
{
|
||||
data.resize_fill(string_start + n);
|
||||
}
|
||||
else if (length > n)
|
||||
{
|
||||
data.resize_assume_reserved(string_start);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Reader>
|
||||
static inline bool tryRead(const SerializationFixedString & self, IColumn & column, Reader && reader)
|
||||
{
|
||||
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
|
||||
size_t prev_size = data.size();
|
||||
try
|
||||
{
|
||||
return reader(data) && SerializationFixedString::tryAlignStringLength(self.getN(), data, prev_size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
data.resize_assume_reserved(prev_size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); });
|
||||
}
|
||||
|
||||
bool SerializationFixedString::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); return true; });
|
||||
}
|
||||
|
||||
|
||||
void SerializationFixedString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
@ -169,12 +206,22 @@ void SerializationFixedString::deserializeTextQuoted(IColumn & column, ReadBuffe
|
||||
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); });
|
||||
}
|
||||
|
||||
bool SerializationFixedString::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { return tryReadQuotedStringInto<true>(data, istr); });
|
||||
}
|
||||
|
||||
|
||||
void SerializationFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringUntilEOFInto(data, istr); });
|
||||
}
|
||||
|
||||
bool SerializationFixedString::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringUntilEOFInto(data, istr); return true; });
|
||||
}
|
||||
|
||||
|
||||
void SerializationFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
@ -188,6 +235,10 @@ void SerializationFixedString::deserializeTextJSON(IColumn & column, ReadBuffer
|
||||
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); });
|
||||
}
|
||||
|
||||
bool SerializationFixedString::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { return tryReadJSONStringInto(data, istr); });
|
||||
}
|
||||
|
||||
void SerializationFixedString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
@ -208,6 +259,11 @@ void SerializationFixedString::deserializeTextCSV(IColumn & column, ReadBuffer &
|
||||
read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); });
|
||||
}
|
||||
|
||||
bool SerializationFixedString::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return tryRead(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto<ColumnFixedString::Chars, false, false>(data, istr, csv); return true; });
|
||||
}
|
||||
|
||||
void SerializationFixedString::serializeTextMarkdown(
|
||||
const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
|
@ -26,20 +26,25 @@ public:
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
@ -47,6 +52,7 @@ public:
|
||||
/// If the length is less than getN() the function will add zero characters up to getN().
|
||||
/// If the length is greater than getN() the function will throw an exception.
|
||||
static void alignStringLength(size_t n, PaddedPODArray<UInt8> & data, size_t string_start);
|
||||
static bool tryAlignStringLength(size_t n, PaddedPODArray<UInt8> & data, size_t string_start);
|
||||
};
|
||||
|
||||
}
|
||||
|
187
src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp
Normal file
187
src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp
Normal file
@ -0,0 +1,187 @@
|
||||
#include <DataTypes/Serializations/SerializationIPv4andIPv6.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::serializeText(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings &) const
|
||||
{
|
||||
writeText(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::deserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, bool whole) const
|
||||
{
|
||||
IPv x;
|
||||
readText(x, istr);
|
||||
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
|
||||
if (whole && !istr.eof())
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
bool SerializationIP<IPv>::tryDeserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &, bool whole) const
|
||||
{
|
||||
IPv x;
|
||||
if (!tryReadText(x, istr) || (whole && !istr.eof()))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::serializeTextQuoted(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
writeChar('\'', ostr);
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
writeChar('\'', ostr);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::deserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
|
||||
{
|
||||
IPv x;
|
||||
assertChar('\'', istr);
|
||||
readText(x, istr);
|
||||
assertChar('\'', istr);
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
bool SerializationIP<IPv>::tryDeserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
|
||||
{
|
||||
IPv x;
|
||||
if (!checkChar('\'', istr) || !tryReadText(x, istr) || !checkChar('\'', istr))
|
||||
return false;
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::serializeTextJSON(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
writeChar('"', ostr);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
IPv x;
|
||||
assertChar('"', istr);
|
||||
readText(x, istr);
|
||||
/// this code looks weird, but we want to throw specific exception to match original behavior...
|
||||
if (istr.eof())
|
||||
assertChar('"', istr);
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
if (*istr.position() != '"')
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
|
||||
istr.ignore();
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
bool SerializationIP<IPv>::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
|
||||
{
|
||||
IPv x;
|
||||
if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::serializeTextCSV(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
writeChar('"', ostr);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::deserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
|
||||
{
|
||||
IPv value;
|
||||
readCSV(value, istr);
|
||||
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(value);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
bool SerializationIP<IPv>::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
|
||||
{
|
||||
IPv value;
|
||||
if (!tryReadCSV(value, istr))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(value);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
IPv x = field.get<IPv>();
|
||||
if constexpr (std::is_same_v<IPv, IPv6>)
|
||||
writeBinary(x, ostr);
|
||||
else
|
||||
writeBinaryLittleEndian(x, ostr);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::deserializeBinary(DB::Field & field, DB::ReadBuffer & istr, const DB::FormatSettings &) const
|
||||
{
|
||||
IPv x;
|
||||
if constexpr (std::is_same_v<IPv, IPv6>)
|
||||
readBinary(x, istr);
|
||||
else
|
||||
readBinaryLittleEndian(x, istr);
|
||||
field = NearestFieldType<IPv>(x);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::serializeBinary(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings &) const
|
||||
{
|
||||
writeBinary(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::deserializeBinary(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
|
||||
{
|
||||
IPv x;
|
||||
readBinary(x.toUnderType(), istr);
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::serializeBinaryBulk(const DB::IColumn & column, DB::WriteBuffer & ostr, size_t offset, size_t limit) const
|
||||
{
|
||||
const typename ColumnVector<IPv>::Container & x = typeid_cast<const ColumnVector<IPv> &>(column).getData();
|
||||
|
||||
size_t size = x.size();
|
||||
|
||||
if (limit == 0 || offset + limit > size)
|
||||
limit = size - offset;
|
||||
|
||||
if (limit)
|
||||
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(IPv) * limit);
|
||||
}
|
||||
|
||||
template <typename IPv>
|
||||
void SerializationIP<IPv>::deserializeBinaryBulk(DB::IColumn & column, DB::ReadBuffer & istr, size_t limit, double) const
|
||||
{
|
||||
typename ColumnVector<IPv>::Container & x = typeid_cast<ColumnVector<IPv> &>(column).getData();
|
||||
size_t initial_size = x.size();
|
||||
x.resize(initial_size + limit);
|
||||
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(IPv) * limit);
|
||||
x.resize(initial_size + size / sizeof(IPv));
|
||||
}
|
||||
|
||||
template class SerializationIP<IPv4>;
|
||||
template class SerializationIP<IPv6>;
|
||||
|
||||
}
|
@ -13,123 +13,30 @@ template <typename IPv>
|
||||
class SerializationIP : public SimpleTextSerialization
|
||||
{
|
||||
public:
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
|
||||
{
|
||||
writeText(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
|
||||
}
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override
|
||||
{
|
||||
IPv x;
|
||||
readText(x, istr);
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
|
||||
if (whole && !istr.eof())
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
}
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
|
||||
{
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
}
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
deserializeText(column, istr, settings, false);
|
||||
}
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
|
||||
{
|
||||
writeChar('\'', ostr);
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
writeChar('\'', ostr);
|
||||
}
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
|
||||
{
|
||||
IPv x;
|
||||
assertChar('\'', istr);
|
||||
readText(x, istr);
|
||||
assertChar('\'', istr);
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
|
||||
}
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
writeChar('"', ostr);
|
||||
}
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
IPv x;
|
||||
assertChar('"', istr);
|
||||
readText(x, istr);
|
||||
/// this code looks weird, but we want to throw specific exception to match original behavior...
|
||||
if (istr.eof())
|
||||
assertChar('"', istr);
|
||||
if (*istr.position() != '"')
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
|
||||
istr.ignore();
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
}
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
writeChar('"', ostr);
|
||||
}
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override
|
||||
{
|
||||
IPv value;
|
||||
readCSV(value, istr);
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override;
|
||||
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(value);
|
||||
}
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override
|
||||
{
|
||||
IPv x = field.get<IPv>();
|
||||
if constexpr (std::is_same_v<IPv, IPv6>)
|
||||
writeBinary(x, ostr);
|
||||
else
|
||||
writeBinaryLittleEndian(x, ostr);
|
||||
}
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override
|
||||
{
|
||||
IPv x;
|
||||
if constexpr (std::is_same_v<IPv, IPv6>)
|
||||
readBinary(x, istr);
|
||||
else
|
||||
readBinaryLittleEndian(x, istr);
|
||||
field = NearestFieldType<IPv>(x);
|
||||
}
|
||||
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
|
||||
{
|
||||
writeBinary(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
|
||||
}
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
|
||||
{
|
||||
IPv x;
|
||||
readBinary(x.toUnderType(), istr);
|
||||
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
|
||||
}
|
||||
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override
|
||||
{
|
||||
const typename ColumnVector<IPv>::Container & x = typeid_cast<const ColumnVector<IPv> &>(column).getData();
|
||||
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
size_t size = x.size();
|
||||
|
||||
if (limit == 0 || offset + limit > size)
|
||||
limit = size - offset;
|
||||
|
||||
if (limit)
|
||||
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(IPv) * limit);
|
||||
}
|
||||
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const override
|
||||
{
|
||||
typename ColumnVector<IPv>::Container & x = typeid_cast<ColumnVector<IPv> &>(column).getData();
|
||||
size_t initial_size = x.size();
|
||||
x.resize(initial_size + limit);
|
||||
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(IPv) * limit);
|
||||
x.resize(initial_size + size / sizeof(IPv));
|
||||
}
|
||||
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
|
||||
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const override;
|
||||
};
|
||||
|
||||
using SerializationIPv4 = SerializationIP<IPv4>;
|
||||
|
@ -700,6 +700,11 @@ void SerializationLowCardinality::deserializeTextEscaped(IColumn & column, ReadB
|
||||
deserializeImpl(column, &ISerialization::deserializeTextEscaped, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationLowCardinality::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextEscaped, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationLowCardinality::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeImpl(column, row_num, &ISerialization::serializeTextQuoted, ostr, settings);
|
||||
@ -710,11 +715,21 @@ void SerializationLowCardinality::deserializeTextQuoted(IColumn & column, ReadBu
|
||||
deserializeImpl(column, &ISerialization::deserializeTextQuoted, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationLowCardinality::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextQuoted, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationLowCardinality::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
deserializeImpl(column, &ISerialization::deserializeWholeText, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationLowCardinality::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeImpl(column, &ISerialization::tryDeserializeWholeText, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationLowCardinality::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeImpl(column, row_num, &ISerialization::serializeTextCSV, ostr, settings);
|
||||
@ -725,6 +740,11 @@ void SerializationLowCardinality::deserializeTextCSV(IColumn & column, ReadBuffe
|
||||
deserializeImpl(column, &ISerialization::deserializeTextCSV, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationLowCardinality::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextCSV, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationLowCardinality::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeImpl(column, row_num, &ISerialization::serializeText, ostr, settings);
|
||||
@ -740,6 +760,11 @@ void SerializationLowCardinality::deserializeTextJSON(IColumn & column, ReadBuff
|
||||
deserializeImpl(column, &ISerialization::deserializeTextJSON, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationLowCardinality::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextJSON, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationLowCardinality::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeImpl(column, row_num, &ISerialization::serializeTextXML, ostr, settings);
|
||||
@ -750,6 +775,11 @@ void SerializationLowCardinality::deserializeTextRaw(IColumn & column, ReadBuffe
|
||||
deserializeImpl(column, &ISerialization::deserializeTextRaw, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationLowCardinality::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextRaw, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationLowCardinality::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeImpl(column, row_num, &ISerialization::serializeTextRaw, ostr, settings);
|
||||
@ -769,7 +799,7 @@ template <typename... Params, typename... Args>
|
||||
void SerializationLowCardinality::deserializeImpl(
|
||||
IColumn & column, SerializationLowCardinality::DeserializeFunctionPtr<Params...> func, Args &&... args) const
|
||||
{
|
||||
auto & low_cardinality_column= getColumnLowCardinality(column);
|
||||
auto & low_cardinality_column = getColumnLowCardinality(column);
|
||||
auto temp_column = low_cardinality_column.getDictionary().getNestedColumn()->cloneEmpty();
|
||||
|
||||
auto serialization = dictionary_type->getDefaultSerialization();
|
||||
@ -778,4 +808,19 @@ void SerializationLowCardinality::deserializeImpl(
|
||||
low_cardinality_column.insertFromFullColumn(*temp_column, 0);
|
||||
}
|
||||
|
||||
template <typename... Params, typename... Args>
|
||||
bool SerializationLowCardinality::tryDeserializeImpl(
|
||||
IColumn & column, SerializationLowCardinality::TryDeserializeFunctionPtr<Params...> func, Args &&... args) const
|
||||
{
|
||||
auto & low_cardinality_column = getColumnLowCardinality(column);
|
||||
auto temp_column = low_cardinality_column.getDictionary().getNestedColumn()->cloneEmpty();
|
||||
|
||||
auto serialization = dictionary_type->getDefaultSerialization();
|
||||
if (!(serialization.get()->*func)(*temp_column, std::forward<Args>(args)...))
|
||||
return false;
|
||||
|
||||
low_cardinality_column.insertFromFullColumn(*temp_column, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -55,16 +55,22 @@ public:
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
|
||||
private:
|
||||
@ -79,6 +85,12 @@ private:
|
||||
|
||||
template <typename ... Params, typename... Args>
|
||||
void deserializeImpl(IColumn & column, DeserializeFunctionPtr<Params...> func, Args &&... args) const;
|
||||
|
||||
template <typename ... Params>
|
||||
using TryDeserializeFunctionPtr = bool (ISerialization::*)(IColumn &, Params ...) const;
|
||||
|
||||
template <typename ... Params, typename... Args>
|
||||
bool tryDeserializeImpl(IColumn & column, TryDeserializeFunctionPtr<Params...> func, Args &&... args) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -115,9 +115,11 @@ void SerializationMap::serializeTextImpl(
|
||||
writeChar('}', ostr);
|
||||
}
|
||||
|
||||
template <typename Reader>
|
||||
void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const
|
||||
template <typename ReturnType, typename Reader>
|
||||
ReturnType SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
auto & column_map = assert_cast<ColumnMap &>(column);
|
||||
|
||||
auto & nested_array = column_map.getNestedColumn();
|
||||
@ -128,7 +130,21 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
|
||||
auto & value_column = nested_tuple.getColumn(1);
|
||||
|
||||
size_t size = 0;
|
||||
assertChar('{', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar('{', istr);
|
||||
else if (!checkChar('{', istr))
|
||||
return ReturnType(false);
|
||||
|
||||
auto on_error_no_throw = [&]()
|
||||
{
|
||||
if (size)
|
||||
{
|
||||
nested_tuple.getColumnPtr(0) = key_column.cut(0, offsets.back());
|
||||
nested_tuple.getColumnPtr(1) = value_column.cut(0, offsets.back());
|
||||
}
|
||||
|
||||
return ReturnType(false);
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
@ -138,9 +154,15 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
|
||||
if (!first)
|
||||
{
|
||||
if (*istr.position() == ',')
|
||||
{
|
||||
++istr.position();
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::CANNOT_READ_MAP_FROM_TEXT, "Cannot read Map from text");
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::CANNOT_READ_MAP_FROM_TEXT, "Cannot read Map from text");
|
||||
return on_error_no_throw();
|
||||
}
|
||||
}
|
||||
|
||||
first = false;
|
||||
@ -150,19 +172,32 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
|
||||
if (*istr.position() == '}')
|
||||
break;
|
||||
|
||||
reader(istr, key, key_column);
|
||||
if constexpr (throw_exception)
|
||||
reader(istr, key, key_column);
|
||||
else if (!reader(istr, key, key_column))
|
||||
return on_error_no_throw();
|
||||
|
||||
++size;
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar(':', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar(':', istr);
|
||||
else if (!checkChar(':', istr))
|
||||
return on_error_no_throw();
|
||||
skipWhitespaceIfAny(istr);
|
||||
|
||||
reader(istr, value, value_column);
|
||||
if constexpr (throw_exception)
|
||||
reader(istr, value, value_column);
|
||||
else if (!reader(istr, value, value_column))
|
||||
return on_error_no_throw();
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
|
||||
assertChar('}', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar('}', istr);
|
||||
else if (!checkChar('}', istr))
|
||||
return on_error_no_throw();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -171,10 +206,14 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
|
||||
nested_tuple.getColumnPtr(0) = key_column.cut(0, offsets.back());
|
||||
nested_tuple.getColumnPtr(1) = value_column.cut(0, offsets.back());
|
||||
}
|
||||
throw;
|
||||
|
||||
if constexpr (throw_exception)
|
||||
throw;
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
offsets.push_back(offsets.back() + size);
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
void SerializationMap::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
@ -192,8 +231,8 @@ void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, cons
|
||||
deserializeTextImpl(column, istr,
|
||||
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
|
||||
{
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextQuotedImpl(subcolumn, buf, settings, subcolumn_serialization);
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(subcolumn, buf, settings, subcolumn_serialization);
|
||||
else
|
||||
subcolumn_serialization->deserializeTextQuoted(subcolumn, buf, settings);
|
||||
});
|
||||
@ -202,6 +241,28 @@ void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, cons
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Map");
|
||||
}
|
||||
|
||||
bool SerializationMap::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
||||
{
|
||||
auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
|
||||
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(subcolumn, buf, settings, subcolumn_serialization);
|
||||
return subcolumn_serialization->tryDeserializeTextQuoted(subcolumn, buf, settings);
|
||||
};
|
||||
|
||||
auto ok = deserializeTextImpl<bool>(column, istr, reader);
|
||||
if (!ok)
|
||||
return false;
|
||||
|
||||
if (whole && !istr.eof())
|
||||
{
|
||||
column.popBack(1);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeTextImpl(column, row_num, ostr,
|
||||
@ -260,13 +321,25 @@ void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
|
||||
deserializeTextImpl(column, istr,
|
||||
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
|
||||
{
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextJSONImpl(subcolumn, buf, settings, subcolumn_serialization);
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
|
||||
else
|
||||
subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings);
|
||||
});
|
||||
}
|
||||
|
||||
bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
|
||||
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
|
||||
return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings);
|
||||
};
|
||||
|
||||
return deserializeTextImpl<bool>(column, istr, reader);
|
||||
}
|
||||
|
||||
void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const auto & column_map = assert_cast<const ColumnMap &>(column);
|
||||
@ -308,6 +381,15 @@ void SerializationMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c
|
||||
deserializeText(column, rb, settings, true);
|
||||
}
|
||||
|
||||
bool SerializationMap::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String s;
|
||||
if (!tryReadCSV(s, istr, settings.csv))
|
||||
return false;
|
||||
ReadBufferFromString rb(s);
|
||||
return tryDeserializeText(column, rb, settings, true);
|
||||
}
|
||||
|
||||
void SerializationMap::enumerateStreams(
|
||||
EnumerateStreamsSettings & settings,
|
||||
const StreamCallback & callback,
|
||||
|
@ -24,13 +24,16 @@ public:
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
|
||||
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void enumerateStreams(
|
||||
EnumerateStreamsSettings & settings,
|
||||
@ -68,8 +71,8 @@ private:
|
||||
template <typename KeyWriter, typename ValueWriter>
|
||||
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
|
||||
|
||||
template <typename Reader>
|
||||
void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
|
||||
template <typename ReturnType = void, typename Reader>
|
||||
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ public:
|
||||
void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
|
||||
bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
|
||||
|
||||
/// These methods read and write zero bytes just to allow to figure out size of column.
|
||||
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
|
||||
|
@ -189,55 +189,59 @@ void SerializationNullable::serializeBinary(const IColumn & column, size_t row_n
|
||||
nested->serializeBinary(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
/// Deserialize value into ColumnNullable.
|
||||
/// We need to insert both to nested column and to null byte map, or, in case of exception, to not insert at all.
|
||||
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested>
|
||||
requires std::same_as<ReturnType, void>
|
||||
static ReturnType
|
||||
safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
|
||||
template <typename ReturnType>
|
||||
ReturnType safeAppendToNullMap(ColumnNullable & column, bool is_null)
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
|
||||
if (check_for_null())
|
||||
try
|
||||
{
|
||||
col.insertDefault();
|
||||
column.getNullMapData().push_back(is_null);
|
||||
}
|
||||
else
|
||||
catch (...)
|
||||
{
|
||||
deserialize_nested(col.getNestedColumn());
|
||||
|
||||
try
|
||||
{
|
||||
col.getNullMapData().push_back(0);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
col.getNestedColumn().popBack(1);
|
||||
column.getNestedColumn().popBack(1);
|
||||
if constexpr (std::is_same_v<ReturnType, void>)
|
||||
throw;
|
||||
}
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
/// Deserialize value into non-nullable column. In case of NULL, insert default value and return false.
|
||||
/// Deserialize value into non-nullable column. In case of NULL, insert default and set is_null to true.
|
||||
/// If ReturnType is bool, return true if parsing was successful and false in case of any error.
|
||||
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested>
|
||||
requires std::same_as<ReturnType, bool>
|
||||
static ReturnType
|
||||
safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
|
||||
static ReturnType deserializeImpl(IColumn & column, ReadBuffer & buf, CheckForNull && check_for_null, DeserializeNested && deserialize_nested, bool & is_null)
|
||||
{
|
||||
bool insert_default = check_for_null();
|
||||
if (insert_default)
|
||||
is_null = check_for_null(buf);
|
||||
if (is_null)
|
||||
{
|
||||
column.insertDefault();
|
||||
}
|
||||
else
|
||||
deserialize_nested(column);
|
||||
return !insert_default;
|
||||
{
|
||||
if constexpr (std::is_same_v<ReturnType, void>)
|
||||
deserialize_nested(column, buf);
|
||||
else if (!deserialize_nested(column, buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
|
||||
void SerializationNullable::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
safeDeserialize(column, *nested,
|
||||
[&istr] { bool is_null = false; readBinary(is_null, istr); return is_null; },
|
||||
[this, &istr, settings] (IColumn & nested_column) { nested->deserializeBinary(nested_column, istr, settings); });
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
auto check_for_null = [](ReadBuffer & buf)
|
||||
{
|
||||
bool is_null_ = false;
|
||||
readBinary(is_null_, buf);
|
||||
return is_null_;
|
||||
};
|
||||
auto deserialize_nested = [this, &settings] (IColumn & nested_column, ReadBuffer & buf) { nested->deserializeBinary(nested_column, buf, settings); };
|
||||
deserializeImpl(col.getNestedColumn(), istr, check_for_null, deserialize_nested, is_null);
|
||||
safeAppendToNullMap<void>(col, is_null);
|
||||
}
|
||||
|
||||
|
||||
@ -246,20 +250,19 @@ void SerializationNullable::serializeTextEscaped(const IColumn & column, size_t
|
||||
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
|
||||
|
||||
if (col.isNullAt(row_num))
|
||||
writeString(settings.tsv.null_representation, ostr);
|
||||
serializeNullEscaped(ostr, settings);
|
||||
else
|
||||
nested->serializeTextEscaped(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
|
||||
void SerializationNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void SerializationNullable::serializeNullEscaped(DB::WriteBuffer & ostr, const DB::FormatSettings & settings)
|
||||
{
|
||||
deserializeTextEscapedImpl<void>(column, istr, settings, nested);
|
||||
writeString(settings.tsv.null_representation, ostr);
|
||||
}
|
||||
|
||||
void SerializationNullable::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
bool SerializationNullable::tryDeserializeNullEscaped(DB::ReadBuffer & istr, const DB::FormatSettings & settings)
|
||||
{
|
||||
deserializeTextRawImpl<void>(column, istr, settings, nested);
|
||||
return checkString(settings.tsv.null_representation, istr);
|
||||
}
|
||||
|
||||
void SerializationNullable::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
@ -267,72 +270,73 @@ void SerializationNullable::serializeTextRaw(const IColumn & column, size_t row_
|
||||
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
|
||||
|
||||
if (col.isNullAt(row_num))
|
||||
writeString(settings.tsv.null_representation, ostr);
|
||||
serializeNullRaw(ostr, settings);
|
||||
else
|
||||
nested->serializeTextRaw(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
template<typename ReturnType>
|
||||
ReturnType SerializationNullable::deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested)
|
||||
void SerializationNullable::serializeNullRaw(DB::WriteBuffer & ostr, const DB::FormatSettings & settings)
|
||||
{
|
||||
return deserializeTextEscapedAndRawImpl<ReturnType, false>(column, istr, settings, nested);
|
||||
writeString(settings.tsv.null_representation, ostr);
|
||||
}
|
||||
|
||||
template<typename ReturnType>
|
||||
ReturnType SerializationNullable::deserializeTextEscapedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
|
||||
const SerializationPtr & nested)
|
||||
bool SerializationNullable::tryDeserializeNullRaw(DB::ReadBuffer & istr, const DB::FormatSettings & settings)
|
||||
{
|
||||
return deserializeTextEscapedAndRawImpl<ReturnType, true>(column, istr, settings, nested);
|
||||
return checkString(settings.tsv.null_representation, istr);
|
||||
}
|
||||
|
||||
template<typename ReturnType, bool escaped>
|
||||
ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
|
||||
const SerializationPtr & nested_serialization)
|
||||
ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
const String & null_representation = settings.tsv.null_representation;
|
||||
auto deserialize_nested = [&nested_serialization, &settings] (IColumn & nested_column, ReadBuffer & buf_)
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
{
|
||||
if constexpr (escaped)
|
||||
nested_serialization->deserializeTextEscaped(nested_column, buf_, settings);
|
||||
else
|
||||
nested_serialization->deserializeTextRaw(nested_column, buf_, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (escaped)
|
||||
return nested_serialization->tryDeserializeTextEscaped(nested_column, buf_, settings);
|
||||
else
|
||||
return nested_serialization->tryDeserializeTextRaw(nested_column, buf_, settings);
|
||||
}
|
||||
};
|
||||
|
||||
/// Some data types can deserialize absence of data (e.g. empty string), so eof is ok.
|
||||
if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0]))
|
||||
{
|
||||
/// This is not null, surely.
|
||||
return safeDeserialize<ReturnType>(column, *nested_serialization,
|
||||
[] { return false; },
|
||||
[&nested_serialization, &istr, &settings] (IColumn & nested_column)
|
||||
{
|
||||
if constexpr (escaped)
|
||||
nested_serialization->deserializeTextEscaped(nested_column, istr, settings);
|
||||
else
|
||||
nested_serialization->deserializeTextRaw(nested_column, istr, settings);
|
||||
});
|
||||
return deserializeImpl<ReturnType>(column, istr, [](ReadBuffer &){ return false; }, deserialize_nested, is_null);
|
||||
}
|
||||
|
||||
/// Check if we have enough data in buffer to check if it's a null.
|
||||
if (istr.available() > null_representation.size())
|
||||
{
|
||||
auto check_for_null = [&istr, &null_representation]()
|
||||
auto check_for_null = [&null_representation](ReadBuffer & buf)
|
||||
{
|
||||
auto * pos = istr.position();
|
||||
if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n'))
|
||||
auto * pos = buf.position();
|
||||
if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'))
|
||||
return true;
|
||||
istr.position() = pos;
|
||||
buf.position() = pos;
|
||||
return false;
|
||||
};
|
||||
auto deserialize_nested = [&nested_serialization, &settings, &istr] (IColumn & nested_column)
|
||||
{
|
||||
if constexpr (escaped)
|
||||
nested_serialization->deserializeTextEscaped(nested_column, istr, settings);
|
||||
else
|
||||
nested_serialization->deserializeTextRaw(nested_column, istr, settings);
|
||||
};
|
||||
return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
|
||||
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
|
||||
}
|
||||
|
||||
/// We don't have enough data in buffer to check if it's a null.
|
||||
/// Use PeekableReadBuffer to make a checkpoint before checking null
|
||||
/// representation and rollback if check was failed.
|
||||
PeekableReadBuffer buf(istr, true);
|
||||
auto check_for_null = [&buf, &null_representation]()
|
||||
PeekableReadBuffer peekable_buf(istr, true);
|
||||
auto check_for_null = [&null_representation](ReadBuffer & buf_)
|
||||
{
|
||||
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
|
||||
buf.setCheckpoint();
|
||||
SCOPE_EXIT(buf.dropCheckpoint());
|
||||
if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'))
|
||||
@ -342,16 +346,18 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
|
||||
return false;
|
||||
};
|
||||
|
||||
auto deserialize_nested = [&nested_serialization, &settings, &buf, &null_representation, &istr] (IColumn & nested_column)
|
||||
auto deserialize_nested_with_check = [&deserialize_nested, &nested_serialization, &settings, &null_representation, &istr] (IColumn & nested_column, ReadBuffer & buf_)
|
||||
{
|
||||
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
|
||||
auto * pos = buf.position();
|
||||
if constexpr (escaped)
|
||||
nested_serialization->deserializeTextEscaped(nested_column, buf, settings);
|
||||
else
|
||||
nested_serialization->deserializeTextRaw(nested_column, buf, settings);
|
||||
if constexpr (throw_exception)
|
||||
deserialize_nested(nested_column, buf);
|
||||
else if (!deserialize_nested(nested_column, buf))
|
||||
return ReturnType(false);
|
||||
|
||||
/// Check that we don't have any unread data in PeekableReadBuffer own memory.
|
||||
if (likely(!buf.hasUnreadData()))
|
||||
return;
|
||||
return ReturnType(true);
|
||||
|
||||
/// We have some unread data in PeekableReadBuffer own memory.
|
||||
/// It can happen only if there is a string instead of a number
|
||||
@ -360,6 +366,9 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
|
||||
/// We also should delete incorrectly deserialized value from nested column.
|
||||
nested_column.popBack(1);
|
||||
|
||||
if constexpr (!throw_exception)
|
||||
return ReturnType(false);
|
||||
|
||||
if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
|
||||
throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
|
||||
"containing '\\t' or '\\n' may not work correctly for large input.");
|
||||
@ -377,7 +386,63 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
|
||||
istr.count(), std::string(pos, buf.position() - pos), parsed_value.str());
|
||||
};
|
||||
|
||||
return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
|
||||
return deserializeImpl<ReturnType>(column, peekable_buf, check_for_null, deserialize_nested_with_check, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
deserializeTextEscapedAndRawImpl<void, true>(col.getNestedColumn(), istr, settings, nested, is_null);
|
||||
safeAppendToNullMap<void>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
return deserializeTextEscapedAndRawImpl<bool, true>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextEscaped(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
deserializeTextEscapedAndRawImpl<void, true>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
return !is_null;
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextEscaped(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
return deserializeTextEscapedAndRawImpl<bool, true>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
deserializeTextEscapedAndRawImpl<void, false>(col.getNestedColumn(), istr, settings, nested, is_null);
|
||||
safeAppendToNullMap<void>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
return deserializeTextEscapedAndRawImpl<bool, false>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
deserializeTextEscapedAndRawImpl<void, false>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
return !is_null;
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
return deserializeTextEscapedAndRawImpl<bool, false>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
@ -385,45 +450,51 @@ void SerializationNullable::serializeTextQuoted(const IColumn & column, size_t r
|
||||
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
|
||||
|
||||
if (col.isNullAt(row_num))
|
||||
writeCString("NULL", ostr);
|
||||
serializeNullQuoted(ostr);
|
||||
else
|
||||
nested->serializeTextQuoted(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
|
||||
void SerializationNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void SerializationNullable::serializeNullQuoted(DB::WriteBuffer & ostr)
|
||||
{
|
||||
deserializeTextQuotedImpl<void>(column, istr, settings, nested);
|
||||
writeCString("NULL", ostr);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullQuoted(DB::ReadBuffer & istr)
|
||||
{
|
||||
return checkStringCaseInsensitive("NULL", istr);
|
||||
}
|
||||
|
||||
template<typename ReturnType>
|
||||
ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
|
||||
const SerializationPtr & nested)
|
||||
ReturnType deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
auto deserialize_nested = [&nested, &settings] (IColumn & nested_column, ReadBuffer & buf)
|
||||
{
|
||||
if constexpr (!throw_exception)
|
||||
return nested->tryDeserializeTextQuoted(nested_column, buf, settings);
|
||||
nested->deserializeTextQuoted(nested_column, buf, settings);
|
||||
};
|
||||
|
||||
if (istr.eof() || (*istr.position() != 'N' && *istr.position() != 'n'))
|
||||
{
|
||||
/// This is not null, surely.
|
||||
return safeDeserialize<ReturnType>(column, *nested,
|
||||
[] { return false; },
|
||||
[&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextQuoted(nested_column, istr, settings); });
|
||||
return deserializeImpl<ReturnType>(column, istr, [](ReadBuffer &){ return false; }, deserialize_nested, is_null);
|
||||
}
|
||||
|
||||
/// Check if we have enough data in buffer to check if it's a null.
|
||||
if (istr.available() >= 4)
|
||||
{
|
||||
auto check_for_null = [&istr]()
|
||||
auto check_for_null = [](ReadBuffer & buf)
|
||||
{
|
||||
auto * pos = istr.position();
|
||||
if (checkStringCaseInsensitive("NULL", istr))
|
||||
auto * pos = buf.position();
|
||||
if (checkStringCaseInsensitive("NULL", buf))
|
||||
return true;
|
||||
istr.position() = pos;
|
||||
buf.position() = pos;
|
||||
return false;
|
||||
};
|
||||
auto deserialize_nested = [&nested, &settings, &istr] (IColumn & nested_column)
|
||||
{
|
||||
nested->deserializeTextQuoted(nested_column, istr, settings);
|
||||
};
|
||||
return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
|
||||
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
|
||||
}
|
||||
|
||||
/// We don't have enough data in buffer to check if it's a NULL
|
||||
@ -431,9 +502,10 @@ ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, Re
|
||||
/// to differentiate for example NULL and NaN for float)
|
||||
/// Use PeekableReadBuffer to make a checkpoint before checking
|
||||
/// null and rollback if the check was failed.
|
||||
PeekableReadBuffer buf(istr, true);
|
||||
auto check_for_null = [&buf]()
|
||||
PeekableReadBuffer peekable_buf(istr, true);
|
||||
auto check_for_null = [](ReadBuffer & buf_)
|
||||
{
|
||||
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
|
||||
buf.setCheckpoint();
|
||||
SCOPE_EXIT(buf.dropCheckpoint());
|
||||
if (checkStringCaseInsensitive("NULL", buf))
|
||||
@ -443,39 +515,74 @@ ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, Re
|
||||
return false;
|
||||
};
|
||||
|
||||
auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column)
|
||||
auto deserialize_nested_with_check = [&deserialize_nested] (IColumn & nested_column, ReadBuffer & buf_)
|
||||
{
|
||||
nested->deserializeTextQuoted(nested_column, buf, settings);
|
||||
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
|
||||
|
||||
if constexpr (throw_exception)
|
||||
deserialize_nested(nested_column, buf);
|
||||
else if (!deserialize_nested(nested_column, buf))
|
||||
return false;
|
||||
|
||||
/// Check that we don't have any unread data in PeekableReadBuffer own memory.
|
||||
if (likely(!buf.hasUnreadData()))
|
||||
return;
|
||||
return ReturnType(true);
|
||||
|
||||
/// We have some unread data in PeekableReadBuffer own memory.
|
||||
/// It can happen only if there is an unquoted string instead of a number.
|
||||
/// We also should delete incorrectly deserialized value from nested column.
|
||||
nested_column.popBack(1);
|
||||
|
||||
if constexpr (!throw_exception)
|
||||
return ReturnType(false);
|
||||
|
||||
throw DB::Exception(
|
||||
ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||
"Error while parsing Nullable: got an unquoted string {} instead of a number",
|
||||
String(buf.position(), std::min(10ul, buf.available())));
|
||||
};
|
||||
|
||||
return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
|
||||
return deserializeImpl<ReturnType>(column, peekable_buf, check_for_null, deserialize_nested_with_check, is_null);
|
||||
}
|
||||
|
||||
|
||||
void SerializationNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void SerializationNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
deserializeWholeTextImpl<void>(column, istr, settings, nested);
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
deserializeTextQuotedImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null);
|
||||
safeAppendToNullMap<void>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
return deserializeTextQuotedImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
deserializeTextQuotedImpl<void>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
return !is_null;
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
return deserializeTextQuotedImpl<bool>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
}
|
||||
|
||||
template <typename ReturnType>
|
||||
ReturnType SerializationNullable::deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
|
||||
const SerializationPtr & nested)
|
||||
ReturnType deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null)
|
||||
{
|
||||
PeekableReadBuffer buf(istr, true);
|
||||
auto check_for_null = [&buf]()
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
PeekableReadBuffer peekable_buf(istr, true);
|
||||
auto check_for_null = [](ReadBuffer & buf_)
|
||||
{
|
||||
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
|
||||
buf.setCheckpoint();
|
||||
SCOPE_EXIT(buf.dropCheckpoint());
|
||||
|
||||
@ -490,15 +597,46 @@ ReturnType SerializationNullable::deserializeWholeTextImpl(IColumn & column, Rea
|
||||
return false;
|
||||
};
|
||||
|
||||
auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column)
|
||||
auto deserialize_nested = [&nested, &settings] (IColumn & nested_column, ReadBuffer & buf_)
|
||||
{
|
||||
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
|
||||
if constexpr (!throw_exception)
|
||||
return nested->tryDeserializeWholeText(nested_column, buf, settings);
|
||||
|
||||
nested->deserializeWholeText(nested_column, buf, settings);
|
||||
assert(!buf.hasUnreadData());
|
||||
};
|
||||
|
||||
return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
|
||||
return deserializeImpl<ReturnType>(column, peekable_buf, check_for_null, deserialize_nested, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
deserializeWholeTextImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null);
|
||||
safeAppendToNullMap<void>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
return deserializeWholeTextImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::deserializeNullAsDefaultOrNestedWholeText(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
deserializeWholeTextImpl<void>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
return !is_null;
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedWholeText(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
return deserializeWholeTextImpl<bool>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
@ -510,48 +648,56 @@ void SerializationNullable::serializeTextCSV(const IColumn & column, size_t row_
|
||||
nested->serializeTextCSV(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void SerializationNullable::serializeNullCSV(DB::WriteBuffer & ostr, const DB::FormatSettings & settings)
|
||||
{
|
||||
deserializeTextCSVImpl<void>(column, istr, settings, nested);
|
||||
writeString(settings.csv.null_representation, ostr);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullCSV(DB::ReadBuffer & istr, const DB::FormatSettings & settings)
|
||||
{
|
||||
return checkString(settings.csv.null_representation, istr);
|
||||
}
|
||||
|
||||
template<typename ReturnType>
|
||||
ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
|
||||
const SerializationPtr & nested_serialization)
|
||||
ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
auto deserialize_nested = [&nested_serialization, &settings] (IColumn & nested_column, ReadBuffer & buf)
|
||||
{
|
||||
if constexpr (!throw_exception)
|
||||
return nested_serialization->tryDeserializeTextCSV(nested_column, buf, settings);
|
||||
nested_serialization->deserializeTextCSV(nested_column, buf, settings);
|
||||
};
|
||||
|
||||
const String & null_representation = settings.csv.null_representation;
|
||||
if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0]))
|
||||
{
|
||||
/// This is not null, surely.
|
||||
return safeDeserialize<ReturnType>(column, *nested_serialization,
|
||||
[] { return false; },
|
||||
[&nested_serialization, &istr, &settings] (IColumn & nested_column) { nested_serialization->deserializeTextCSV(nested_column, istr, settings); });
|
||||
return deserializeImpl<ReturnType>(column, istr, [](ReadBuffer &){ return false; }, deserialize_nested, is_null);
|
||||
}
|
||||
|
||||
/// Check if we have enough data in buffer to check if it's a null.
|
||||
if (settings.csv.custom_delimiter.empty() && istr.available() > null_representation.size())
|
||||
{
|
||||
auto check_for_null = [&istr, &null_representation, &settings]()
|
||||
auto check_for_null = [&null_representation, &settings](ReadBuffer & buf)
|
||||
{
|
||||
auto * pos = istr.position();
|
||||
if (checkString(null_representation, istr) && (*istr.position() == settings.csv.delimiter || *istr.position() == '\r' || *istr.position() == '\n'))
|
||||
auto * pos = buf.position();
|
||||
if (checkString(null_representation, buf) && (*buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n'))
|
||||
return true;
|
||||
istr.position() = pos;
|
||||
buf.position() = pos;
|
||||
return false;
|
||||
};
|
||||
auto deserialize_nested = [&nested_serialization, &settings, &istr] (IColumn & nested_column)
|
||||
{
|
||||
nested_serialization->deserializeTextCSV(nested_column, istr, settings);
|
||||
};
|
||||
return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
|
||||
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
|
||||
}
|
||||
|
||||
/// We don't have enough data in buffer to check if it's a null.
|
||||
/// Use PeekableReadBuffer to make a checkpoint before checking null
|
||||
/// representation and rollback if the check was failed.
|
||||
PeekableReadBuffer buf(istr, true);
|
||||
auto check_for_null = [&buf, &null_representation, &settings]()
|
||||
PeekableReadBuffer peekable_buf(istr, true);
|
||||
auto check_for_null = [&null_representation, &settings](ReadBuffer & buf_)
|
||||
{
|
||||
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
|
||||
buf.setCheckpoint();
|
||||
SCOPE_EXIT(buf.dropCheckpoint());
|
||||
if (checkString(null_representation, buf))
|
||||
@ -574,13 +720,18 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
|
||||
return false;
|
||||
};
|
||||
|
||||
auto deserialize_nested = [&nested_serialization, &settings, &buf, &null_representation, &istr] (IColumn & nested_column)
|
||||
auto deserialize_nested_with_check = [&deserialize_nested, &nested_serialization, &settings, &null_representation, &istr] (IColumn & nested_column, ReadBuffer & buf_)
|
||||
{
|
||||
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
|
||||
auto * pos = buf.position();
|
||||
nested_serialization->deserializeTextCSV(nested_column, buf, settings);
|
||||
if constexpr (throw_exception)
|
||||
deserialize_nested(nested_column, buf);
|
||||
else if (!deserialize_nested(nested_column, buf))
|
||||
return ReturnType(false);
|
||||
|
||||
/// Check that we don't have any unread data in PeekableReadBuffer own memory.
|
||||
if (likely(!buf.hasUnreadData()))
|
||||
return;
|
||||
return ReturnType(true);
|
||||
|
||||
/// We have some unread data in PeekableReadBuffer own memory.
|
||||
/// It can happen only if there is an unquoted string instead of a number
|
||||
@ -589,6 +740,9 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
|
||||
/// We also should delete incorrectly deserialized value from nested column.
|
||||
nested_column.popBack(1);
|
||||
|
||||
if constexpr (!throw_exception)
|
||||
return ReturnType(false);
|
||||
|
||||
if (null_representation.find(settings.csv.delimiter) != std::string::npos || null_representation.find('\r') != std::string::npos
|
||||
|| null_representation.find('\n') != std::string::npos)
|
||||
throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "CSV custom null representation containing "
|
||||
@ -604,7 +758,35 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
|
||||
istr.count(), std::string(pos, buf.position() - pos), parsed_value.str());
|
||||
};
|
||||
|
||||
return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
|
||||
return deserializeImpl<ReturnType>(column, peekable_buf, check_for_null, deserialize_nested_with_check, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
deserializeTextCSVImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null);
|
||||
safeAppendToNullMap<void>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
return deserializeTextCSVImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
deserializeTextCSVImpl<void>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
return !is_null;
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
return deserializeTextCSVImpl<bool>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
@ -618,38 +800,86 @@ void SerializationNullable::serializeText(const IColumn & column, size_t row_num
|
||||
/// This assumes UTF-8 and proper font support. This is Ok, because Pretty formats are "presentational", not for data exchange.
|
||||
|
||||
if (col.isNullAt(row_num))
|
||||
{
|
||||
if (settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8)
|
||||
writeCString("ᴺᵁᴸᴸ", ostr);
|
||||
else
|
||||
writeCString("NULL", ostr);
|
||||
}
|
||||
serializeNullText(ostr, settings);
|
||||
else
|
||||
nested->serializeText(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationNullable::serializeNullText(DB::WriteBuffer & ostr, const DB::FormatSettings & settings)
|
||||
{
|
||||
if (settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8)
|
||||
writeCString("ᴺᵁᴸᴸ", ostr);
|
||||
else
|
||||
writeCString("NULL", ostr);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullText(DB::ReadBuffer & istr)
|
||||
{
|
||||
if (checkCharCaseInsensitive('N', istr))
|
||||
return checkStringCaseInsensitive("ULL", istr);
|
||||
return checkStringCaseInsensitive("ᴺᵁᴸᴸ", istr);
|
||||
}
|
||||
|
||||
void SerializationNullable::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
|
||||
|
||||
if (col.isNullAt(row_num))
|
||||
writeCString("null", ostr);
|
||||
serializeNullJSON(ostr);
|
||||
else
|
||||
nested->serializeTextJSON(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
void SerializationNullable::serializeNullJSON(DB::WriteBuffer & ostr)
|
||||
{
|
||||
deserializeTextJSONImpl<void>(column, istr, settings, nested);
|
||||
writeCString("null", ostr);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullJSON(DB::ReadBuffer & istr)
|
||||
{
|
||||
return checkString("null", istr);
|
||||
}
|
||||
|
||||
template<typename ReturnType>
|
||||
ReturnType SerializationNullable::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
|
||||
const SerializationPtr & nested)
|
||||
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null)
|
||||
{
|
||||
return safeDeserialize<ReturnType>(column, *nested,
|
||||
[&istr] { return checkStringByFirstCharacterAndAssertTheRest("null", istr); },
|
||||
[&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextJSON(nested_column, istr, settings); });
|
||||
auto check_for_null = [](ReadBuffer & buf){ return checkStringByFirstCharacterAndAssertTheRest("null", buf); };
|
||||
auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf)
|
||||
{
|
||||
if constexpr (std::is_same_v<ReturnType, bool>)
|
||||
return nested->tryDeserializeTextJSON(nested_column, buf, settings);
|
||||
nested->deserializeTextJSON(nested_column, buf, settings);
|
||||
};
|
||||
|
||||
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
deserializeTextJSONImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null);
|
||||
safeAppendToNullMap<void>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
bool is_null;
|
||||
return deserializeTextJSONImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
|
||||
}
|
||||
|
||||
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
deserializeTextJSONImpl<void>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
return !is_null;
|
||||
}
|
||||
|
||||
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
|
||||
{
|
||||
bool is_null;
|
||||
return deserializeTextJSONImpl<bool>(nested_column, istr, settings, nested_serialization, is_null);
|
||||
}
|
||||
|
||||
void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
@ -662,11 +892,9 @@ void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_
|
||||
nested->serializeTextXML(col.getNestedColumn(), row_num, ostr, settings);
|
||||
}
|
||||
|
||||
template bool SerializationNullable::deserializeWholeTextImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
|
||||
template bool SerializationNullable::deserializeTextEscapedImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
|
||||
template bool SerializationNullable::deserializeTextQuotedImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
|
||||
template bool SerializationNullable::deserializeTextCSVImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
|
||||
template bool SerializationNullable::deserializeTextJSONImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
|
||||
template bool SerializationNullable::deserializeTextRawImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
|
||||
void SerializationNullable::serializeNullXML(DB::WriteBuffer & ostr)
|
||||
{
|
||||
writeCString("\\N", ostr);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -51,9 +51,12 @@ public:
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
@ -66,31 +69,49 @@ public:
|
||||
* In CSV, non-NULL string value, starting with \N characters, must be placed in quotes, to avoid ambiguity.
|
||||
*/
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
|
||||
/// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false)
|
||||
/// If ReturnType is void, deserialize Nullable(T)
|
||||
template <typename ReturnType = bool>
|
||||
static ReturnType deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
|
||||
template <typename ReturnType = bool>
|
||||
static ReturnType deserializeTextEscapedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
|
||||
template <typename ReturnType = bool>
|
||||
static ReturnType deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
|
||||
template <typename ReturnType = bool>
|
||||
static ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
|
||||
template <typename ReturnType = bool>
|
||||
static ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
|
||||
template <typename ReturnType = bool>
|
||||
static ReturnType deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
|
||||
template <typename ReturnType = bool, bool escaped>
|
||||
static ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
|
||||
/// If Check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false)
|
||||
static bool deserializeNullAsDefaultOrNestedWholeText(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
|
||||
static bool deserializeNullAsDefaultOrNestedTextEscaped(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
|
||||
static bool deserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
|
||||
static bool deserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
|
||||
static bool deserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
|
||||
static bool deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
|
||||
|
||||
/// If Check for NULL and deserialize value into non-nullable column or insert default value of nested type.
|
||||
/// Return true if parsing was successful and false in case of any error.
|
||||
static bool tryDeserializeNullAsDefaultOrNestedWholeText(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
|
||||
static bool tryDeserializeNullAsDefaultOrNestedTextEscaped(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
|
||||
static bool tryDeserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
|
||||
static bool tryDeserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
|
||||
static bool tryDeserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
|
||||
static bool tryDeserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
|
||||
|
||||
|
||||
static void serializeNullEscaped(WriteBuffer & ostr, const FormatSettings & settings);
|
||||
static bool tryDeserializeNullEscaped(ReadBuffer & istr, const FormatSettings & settings);
|
||||
static void serializeNullQuoted(WriteBuffer & ostr);
|
||||
static bool tryDeserializeNullQuoted(ReadBuffer & istr);
|
||||
static void serializeNullCSV(WriteBuffer & ostr, const FormatSettings & settings);
|
||||
static bool tryDeserializeNullCSV(ReadBuffer & istr, const FormatSettings & settings);
|
||||
static void serializeNullJSON(WriteBuffer & ostr);
|
||||
static bool tryDeserializeNullJSON(ReadBuffer & istr);
|
||||
static void serializeNullRaw(WriteBuffer & ostr, const FormatSettings & settings);
|
||||
static bool tryDeserializeNullRaw(ReadBuffer & istr, const FormatSettings & settings);
|
||||
static void serializeNullText(WriteBuffer & ostr, const FormatSettings & settings);
|
||||
static bool tryDeserializeNullText(ReadBuffer & istr);
|
||||
static void serializeNullXML(WriteBuffer & ostr);
|
||||
|
||||
private:
|
||||
struct SubcolumnCreator : public ISubcolumnCreator
|
||||
|
@ -37,6 +37,18 @@ void SerializationNumber<T>::deserializeText(IColumn & column, ReadBuffer & istr
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Number");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SerializationNumber<T>::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const
|
||||
{
|
||||
T x;
|
||||
|
||||
if (!tryReadText(x, istr) || (whole && !istr.eof()))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SerializationNumber<T>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
@ -44,9 +56,10 @@ void SerializationNumber<T>::serializeTextJSON(const IColumn & column, size_t ro
|
||||
writeJSONNumber(x, ostr, settings);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
template <typename T, typename ReturnType>
|
||||
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
bool has_quote = false;
|
||||
if (!istr.eof() && *istr.position() == '"') /// We understand the number both in quotes and without.
|
||||
{
|
||||
@ -54,13 +67,16 @@ void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer &
|
||||
++istr.position();
|
||||
}
|
||||
|
||||
FieldType x;
|
||||
T x;
|
||||
|
||||
/// null
|
||||
if (!has_quote && !istr.eof() && *istr.position() == 'n')
|
||||
{
|
||||
++istr.position();
|
||||
assertString("ull", istr);
|
||||
if constexpr (throw_exception)
|
||||
assertString("ull", istr);
|
||||
else if (!checkString("ull", istr))
|
||||
return ReturnType(false);
|
||||
|
||||
x = NaNOrZero<T>();
|
||||
}
|
||||
@ -73,26 +89,62 @@ void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer &
|
||||
{
|
||||
// extra conditions to parse true/false strings into 1/0
|
||||
if (istr.eof())
|
||||
throwReadAfterEOF();
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throwReadAfterEOF();
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
if (*istr.position() == 't' || *istr.position() == 'f')
|
||||
{
|
||||
bool tmp = false;
|
||||
readBoolTextWord(tmp, istr);
|
||||
if constexpr (throw_exception)
|
||||
readBoolTextWord(tmp, istr);
|
||||
else if (!readBoolTextWord<bool>(tmp, istr))
|
||||
return ReturnType(false);
|
||||
|
||||
x = tmp;
|
||||
}
|
||||
else
|
||||
readText(x, istr);
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
readText(x, istr);
|
||||
else if (!tryReadText(x, istr))
|
||||
return ReturnType(false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
readText(x, istr);
|
||||
if constexpr (throw_exception)
|
||||
readText(x, istr);
|
||||
else if (!tryReadText(x, istr))
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
if (has_quote)
|
||||
assertChar('"', istr);
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
assertChar('"', istr);
|
||||
else if (!checkChar('"', istr))
|
||||
return ReturnType(false);
|
||||
}
|
||||
}
|
||||
|
||||
assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
deserializeTextJSONImpl<T, void>(column, istr, settings);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SerializationNumber<T>::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return deserializeTextJSONImpl<T, bool>(column, istr, settings);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -103,6 +155,16 @@ void SerializationNumber<T>::deserializeTextCSV(IColumn & column, ReadBuffer & i
|
||||
assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SerializationNumber<T>::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & /*settings*/) const
|
||||
{
|
||||
FieldType x;
|
||||
if (!tryReadCSV(x, istr))
|
||||
return false;
|
||||
assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SerializationNumber<T>::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
|
@ -20,9 +20,12 @@ public:
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
/** Format is platform-dependent. */
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
@ -272,40 +272,67 @@ void SerializationString::serializeTextEscaped(const IColumn & column, size_t ro
|
||||
}
|
||||
|
||||
|
||||
template <typename Reader>
|
||||
static inline void read(IColumn & column, Reader && reader)
|
||||
template <typename ReturnType, typename Reader>
|
||||
static inline ReturnType read(IColumn & column, Reader && reader)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
ColumnString & column_string = assert_cast<ColumnString &>(column);
|
||||
ColumnString::Chars & data = column_string.getChars();
|
||||
ColumnString::Offsets & offsets = column_string.getOffsets();
|
||||
size_t old_chars_size = data.size();
|
||||
size_t old_offsets_size = offsets.size();
|
||||
try
|
||||
{
|
||||
reader(data);
|
||||
data.push_back(0);
|
||||
offsets.push_back(data.size());
|
||||
}
|
||||
catch (...)
|
||||
auto restore_column = [&]()
|
||||
{
|
||||
offsets.resize_assume_reserved(old_offsets_size);
|
||||
data.resize_assume_reserved(old_chars_size);
|
||||
throw;
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
{
|
||||
reader(data);
|
||||
}
|
||||
else if (!reader(data))
|
||||
{
|
||||
restore_column();
|
||||
return false;
|
||||
}
|
||||
|
||||
data.push_back(0);
|
||||
offsets.push_back(data.size());
|
||||
return ReturnType(true);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
restore_column();
|
||||
if constexpr (throw_exception)
|
||||
throw;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SerializationString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
read(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); });
|
||||
}
|
||||
|
||||
bool SerializationString::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
return read<bool>(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); return true; });
|
||||
}
|
||||
|
||||
void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); });
|
||||
}
|
||||
|
||||
bool SerializationString::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
return read<bool>(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); return true; });
|
||||
}
|
||||
|
||||
void SerializationString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
@ -315,7 +342,12 @@ void SerializationString::serializeTextQuoted(const IColumn & column, size_t row
|
||||
|
||||
void SerializationString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
read(column, [&](ColumnString::Chars & data) { readQuotedStringInto<true>(data, istr); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { readQuotedStringInto<true>(data, istr); });
|
||||
}
|
||||
|
||||
bool SerializationString::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
return read<bool>(column, [&](ColumnString::Chars & data) { return tryReadQuotedStringInto<true>(data, istr); });
|
||||
}
|
||||
|
||||
|
||||
@ -329,11 +361,11 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
|
||||
{
|
||||
if (settings.json.read_objects_as_strings && !istr.eof() && *istr.position() == '{')
|
||||
{
|
||||
read(column, [&](ColumnString::Chars & data) { readJSONObjectPossiblyInvalid(data, istr); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { readJSONObjectPossiblyInvalid(data, istr); });
|
||||
}
|
||||
else if (settings.json.read_arrays_as_strings && !istr.eof() && *istr.position() == '[')
|
||||
{
|
||||
read(column, [&](ColumnString::Chars & data) { readJSONArrayInto(data, istr); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { readJSONArrayInto(data, istr); });
|
||||
}
|
||||
else if (settings.json.read_bools_as_strings && !istr.eof() && (*istr.position() == 't' || *istr.position() == 'f'))
|
||||
{
|
||||
@ -349,7 +381,7 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
|
||||
str_value = "false";
|
||||
}
|
||||
|
||||
read(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
|
||||
}
|
||||
else if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
|
||||
{
|
||||
@ -358,12 +390,60 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
|
||||
Float64 tmp;
|
||||
ReadBufferFromString buf(field);
|
||||
if (tryReadFloatText(tmp, buf) && buf.eof())
|
||||
read(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON String value here: {}", field);
|
||||
}
|
||||
else
|
||||
read(column, [&](ColumnString::Chars & data) { readJSONStringInto(data, istr); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { readJSONStringInto(data, istr); });
|
||||
}
|
||||
|
||||
bool SerializationString::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (settings.json.read_objects_as_strings && !istr.eof() && *istr.position() == '{')
|
||||
return read<bool>(column, [&](ColumnString::Chars & data) { return readJSONObjectPossiblyInvalid<ColumnString::Chars, bool>(data, istr); });
|
||||
|
||||
if (settings.json.read_arrays_as_strings && !istr.eof() && *istr.position() == '[')
|
||||
return read<bool>(column, [&](ColumnString::Chars & data) { return readJSONArrayInto<ColumnString::Chars, bool>(data, istr); });
|
||||
|
||||
if (settings.json.read_bools_as_strings && !istr.eof() && (*istr.position() == 't' || *istr.position() == 'f'))
|
||||
{
|
||||
String str_value;
|
||||
if (*istr.position() == 't')
|
||||
{
|
||||
if (!checkString("true", istr))
|
||||
return false;
|
||||
str_value = "true";
|
||||
}
|
||||
else if (*istr.position() == 'f')
|
||||
{
|
||||
if (!checkString("false", istr))
|
||||
return false;
|
||||
str_value = "false";
|
||||
}
|
||||
|
||||
read<void>(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
|
||||
return true;
|
||||
}
|
||||
|
||||
if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
|
||||
{
|
||||
String field;
|
||||
if (!tryReadJSONField(field, istr))
|
||||
return false;
|
||||
|
||||
Float64 tmp;
|
||||
ReadBufferFromString buf(field);
|
||||
if (tryReadFloatText(tmp, buf) && buf.eof())
|
||||
{
|
||||
read<void>(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return read<bool>(column, [&](ColumnString::Chars & data) { return tryReadJSONStringInto(data, istr); });
|
||||
}
|
||||
|
||||
|
||||
@ -381,7 +461,12 @@ void SerializationString::serializeTextCSV(const IColumn & column, size_t row_nu
|
||||
|
||||
void SerializationString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
read(column, [&](ColumnString::Chars & data) { readCSVStringInto(data, istr, settings.csv); });
|
||||
read<void>(column, [&](ColumnString::Chars & data) { readCSVStringInto(data, istr, settings.csv); });
|
||||
}
|
||||
|
||||
bool SerializationString::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return read<bool>(column, [&](ColumnString::Chars & data) { readCSVStringInto<ColumnString::Chars, false, false>(data, istr, settings.csv); return true; });
|
||||
}
|
||||
|
||||
void SerializationString::serializeTextMarkdown(
|
||||
|
@ -18,20 +18,25 @@ public:
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
};
|
||||
|
@ -62,15 +62,38 @@ void SerializationTuple::serializeBinary(const IColumn & column, size_t row_num,
|
||||
}
|
||||
|
||||
|
||||
template <typename F>
|
||||
static void addElementSafe(size_t num_elems, IColumn & column, F && impl)
|
||||
template <typename ReturnType = void, typename F>
|
||||
static ReturnType addElementSafe(size_t num_elems, IColumn & column, F && impl)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
/// We use the assumption that tuples of zero size do not exist.
|
||||
size_t old_size = column.size();
|
||||
|
||||
auto restore_elements = [&]()
|
||||
{
|
||||
for (size_t i = 0; i < num_elems; ++i)
|
||||
{
|
||||
auto & element_column = extractElementColumn(column, i);
|
||||
if (element_column.size() > old_size)
|
||||
{
|
||||
chassert(element_column.size() - old_size == 1);
|
||||
element_column.popBack(1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
impl();
|
||||
if constexpr (throw_exception)
|
||||
{
|
||||
impl();
|
||||
}
|
||||
else if (!impl())
|
||||
{
|
||||
restore_elements();
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
// Check that all columns now have the same size.
|
||||
size_t new_size = column.size();
|
||||
@ -81,22 +104,23 @@ static void addElementSafe(size_t num_elems, IColumn & column, F && impl)
|
||||
{
|
||||
// This is not a logical error because it may work with
|
||||
// user-supplied data.
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
|
||||
"Cannot read a tuple because not all elements are present");
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
|
||||
"Cannot read a tuple because not all elements are present");
|
||||
restore_elements();
|
||||
return ReturnType(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
for (size_t i = 0; i < num_elems; ++i)
|
||||
{
|
||||
auto & element_column = extractElementColumn(column, i);
|
||||
if (element_column.size() > old_size)
|
||||
element_column.popBack(1);
|
||||
}
|
||||
|
||||
throw;
|
||||
restore_elements();
|
||||
if constexpr (throw_exception)
|
||||
throw;
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
@ -120,25 +144,51 @@ void SerializationTuple::serializeText(const IColumn & column, size_t row_num, W
|
||||
writeChar(')', ostr);
|
||||
}
|
||||
|
||||
void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
||||
template <typename ReturnType>
|
||||
ReturnType SerializationTuple::deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
||||
{
|
||||
const size_t size = elems.size();
|
||||
assertChar('(', istr);
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
addElementSafe(elems.size(), column, [&]
|
||||
const size_t size = elems.size();
|
||||
if constexpr (throw_exception)
|
||||
assertChar('(', istr);
|
||||
else if (!checkChar('(', istr))
|
||||
return ReturnType(false);
|
||||
|
||||
auto impl = [&]()
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
skipWhitespaceIfAny(istr);
|
||||
if (i != 0)
|
||||
{
|
||||
assertChar(',', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar(',', istr);
|
||||
else if (!checkChar(',', istr))
|
||||
return ReturnType(false);
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextQuotedImpl(extractElementColumn(column, i), istr, settings, elems[i]);
|
||||
|
||||
auto & element_column = extractElementColumn(column, i);
|
||||
if constexpr (throw_exception)
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(element_column, istr, settings, elems[i]);
|
||||
else
|
||||
elems[i]->deserializeTextQuoted(element_column, istr, settings);
|
||||
}
|
||||
else
|
||||
elems[i]->deserializeTextQuoted(extractElementColumn(column, i), istr, settings);
|
||||
{
|
||||
bool ok;
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
|
||||
ok = SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(element_column, istr, settings, elems[i]);
|
||||
else
|
||||
ok = elems[i]->tryDeserializeTextQuoted(element_column, istr, settings);
|
||||
|
||||
if (!ok)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Special format for one element tuple (1,)
|
||||
@ -150,11 +200,32 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co
|
||||
}
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar(')', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar(')', istr);
|
||||
else if (!checkChar(')', istr))
|
||||
return ReturnType(false);
|
||||
|
||||
if (whole && !istr.eof())
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Tuple");
|
||||
});
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Tuple");
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
};
|
||||
|
||||
return addElementSafe<ReturnType>(elems.size(), column, impl);
|
||||
}
|
||||
|
||||
void SerializationTuple::deserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, bool whole) const
|
||||
{
|
||||
deserializeTextImpl(column, istr, settings, whole);
|
||||
}
|
||||
|
||||
bool SerializationTuple::tryDeserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, bool whole) const
|
||||
{
|
||||
return deserializeTextImpl<bool>(column, istr, settings, whole);
|
||||
}
|
||||
|
||||
void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
@ -239,16 +310,39 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t
|
||||
}
|
||||
}
|
||||
|
||||
void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
template <typename ReturnType>
|
||||
ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
auto deserialize_element = [&](IColumn & element_column, size_t element_pos)
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]);
|
||||
else
|
||||
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
|
||||
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]);
|
||||
return elems[element_pos]->tryDeserializeTextJSON(element_column, istr, settings);
|
||||
}
|
||||
};
|
||||
|
||||
if (settings.json.read_named_tuples_as_objects
|
||||
&& have_explicit_names)
|
||||
{
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar('{', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar('{', istr);
|
||||
else if (!checkChar('{', istr))
|
||||
return ReturnType(false);
|
||||
skipWhitespaceIfAny(istr);
|
||||
|
||||
addElementSafe(elems.size(), column, [&]
|
||||
auto impl = [&]()
|
||||
{
|
||||
std::vector<UInt8> seen_elements(elems.size(), 0);
|
||||
size_t processed = 0;
|
||||
@ -256,18 +350,32 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
while (!istr.eof() && *istr.position() != '}')
|
||||
{
|
||||
if (!settings.json.ignore_unknown_keys_in_named_tuple && processed == elems.size())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {} (consider enabling input_format_json_ignore_unknown_keys_in_named_tuple setting)", elems.size());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {} (consider enabling input_format_json_ignore_unknown_keys_in_named_tuple setting)", elems.size());
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
if (processed + skipped > 0)
|
||||
{
|
||||
assertChar(',', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar(',', istr);
|
||||
else if (!checkChar(',', istr))
|
||||
return ReturnType(false);
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
|
||||
std::string name;
|
||||
readDoubleQuotedString(name, istr);
|
||||
if constexpr (throw_exception)
|
||||
readDoubleQuotedString(name, istr);
|
||||
else if (!tryReadDoubleQuotedString(name, istr))
|
||||
return ReturnType(false);
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar(':', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar(':', istr);
|
||||
else if (!checkChar(':', istr))
|
||||
return ReturnType(false);
|
||||
skipWhitespaceIfAny(istr);
|
||||
|
||||
const size_t element_pos = getPositionByName(name);
|
||||
@ -275,36 +383,52 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
{
|
||||
if (settings.json.ignore_unknown_keys_in_named_tuple)
|
||||
{
|
||||
skipJSONField(istr, name);
|
||||
if constexpr (throw_exception)
|
||||
skipJSONField(istr, name);
|
||||
else if (!trySkipJSONField(istr, name))
|
||||
return ReturnType(false);
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
++skipped;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}', enable setting input_format_json_ignore_unknown_keys_in_named_tuple", name);
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}', enable setting input_format_json_ignore_unknown_keys_in_named_tuple", name);
|
||||
return ReturnType(false);
|
||||
}
|
||||
}
|
||||
|
||||
seen_elements[element_pos] = 1;
|
||||
auto & element_column = extractElementColumn(column, element_pos);
|
||||
|
||||
try
|
||||
if constexpr (throw_exception)
|
||||
{
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextJSONImpl(element_column, istr, settings, elems[element_pos]);
|
||||
else
|
||||
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
|
||||
try
|
||||
{
|
||||
deserialize_element(element_column, element_pos);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("(while reading the value of nested key " + name + ")");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
else
|
||||
{
|
||||
e.addMessage("(while reading the value of nested key " + name + ")");
|
||||
throw;
|
||||
if (!deserialize_element(element_column, element_pos))
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
++processed;
|
||||
}
|
||||
|
||||
assertChar('}', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar('}', istr);
|
||||
else if (!checkChar('}', istr))
|
||||
return ReturnType(false);
|
||||
|
||||
/// Check if we have missing elements.
|
||||
if (processed != elems.size())
|
||||
@ -315,41 +439,81 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
continue;
|
||||
|
||||
if (!settings.json.defaults_for_missing_elements_in_named_tuple)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
|
||||
"enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
|
||||
elems[element_pos]->getElementName());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
|
||||
"enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
|
||||
elems[element_pos]->getElementName());
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
auto & element_column = extractElementColumn(column, element_pos);
|
||||
element_column.insertDefault();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return ReturnType(true);
|
||||
};
|
||||
|
||||
return addElementSafe<ReturnType>(elems.size(), column, impl);
|
||||
}
|
||||
else
|
||||
{
|
||||
assertChar('[', istr);
|
||||
skipWhitespaceIfAny(istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar('[', istr);
|
||||
else if (!checkChar('[', istr))
|
||||
return ReturnType(false);
|
||||
skipWhitespaceIfAny(istr);
|
||||
|
||||
addElementSafe(elems.size(), column, [&]
|
||||
auto impl = [&]()
|
||||
{
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
{
|
||||
skipWhitespaceIfAny(istr);
|
||||
if (i != 0)
|
||||
{
|
||||
assertChar(',', istr);
|
||||
if constexpr (throw_exception)
|
||||
assertChar(',', istr);
|
||||
else if (!checkChar(',', istr))
|
||||
return ReturnType(false);
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
elems[i]->deserializeTextJSON(extractElementColumn(column, i), istr, settings);
|
||||
|
||||
auto & element_column = extractElementColumn(column, i);
|
||||
|
||||
if constexpr (throw_exception)
|
||||
deserialize_element(element_column, i);
|
||||
else if (!deserialize_element(element_column, i))
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar(']', istr);
|
||||
});
|
||||
if constexpr (throw_exception)
|
||||
assertChar(']', istr);
|
||||
else if (!checkChar(']', istr))
|
||||
return ReturnType(false);
|
||||
|
||||
return ReturnType(true);
|
||||
};
|
||||
|
||||
return addElementSafe<ReturnType>(elems.size(), column, impl);
|
||||
}
|
||||
}
|
||||
|
||||
void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
deserializeTextJSONImpl(column, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
|
||||
{
|
||||
return deserializeTextJSONImpl<bool>(column, istr, settings);
|
||||
}
|
||||
|
||||
|
||||
void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeCString("<tuple>", ostr);
|
||||
@ -385,14 +549,48 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
||||
assertChar(settings.csv.tuple_delimiter, istr);
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextCSVImpl(extractElementColumn(column, i), istr, settings, elems[i]);
|
||||
|
||||
auto & element_column = extractElementColumn(column, i);
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
|
||||
SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]);
|
||||
else
|
||||
elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, settings);
|
||||
elems[i]->deserializeTextCSV(element_column, istr, settings);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return addElementSafe<bool>(elems.size(), column, [&]
|
||||
{
|
||||
const size_t size = elems.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
{
|
||||
skipWhitespaceIfAny(istr);
|
||||
if (!checkChar(settings.csv.tuple_delimiter, istr))
|
||||
return false;
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
|
||||
auto & element_column = extractElementColumn(column, i);
|
||||
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
|
||||
{
|
||||
if (!SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!elems[i]->tryDeserializeTextCSV(element_column, istr, settings))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
void SerializationTuple::enumerateStreams(
|
||||
EnumerateStreamsSettings & settings,
|
||||
const StreamCallback & callback,
|
||||
|
@ -23,14 +23,17 @@ public:
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
|
||||
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
|
||||
/// Tuples in CSV format will be serialized as separate columns (that is, losing their nesting in the tuple).
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
/** Each sub-column in a tuple is serialized in separate stream.
|
||||
*/
|
||||
@ -73,6 +76,15 @@ private:
|
||||
bool have_explicit_names;
|
||||
|
||||
size_t getPositionByName(const String & name) const;
|
||||
|
||||
template <typename ReturnType = void>
|
||||
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const;
|
||||
|
||||
template <typename ReturnType = void>
|
||||
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
|
||||
|
||||
template <typename ReturnType = void>
|
||||
ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -25,15 +25,16 @@ void SerializationUUID::deserializeText(IColumn & column, ReadBuffer & istr, con
|
||||
throwUnexpectedDataAfterParsedValue(column, istr, settings, "UUID");
|
||||
}
|
||||
|
||||
void SerializationUUID::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
bool SerializationUUID::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const
|
||||
{
|
||||
deserializeText(column, istr, settings, false);
|
||||
UUID x;
|
||||
if (!tryReadText(x, istr) || (whole && !istr.eof()))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnUUID &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationUUID::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
serializeText(column, row_num, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationUUID::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
@ -76,6 +77,17 @@ void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
|
||||
assert_cast<ColumnUUID &>(column).getData().push_back(std::move(uuid)); /// It's important to do this at the end - for exception safety.
|
||||
}
|
||||
|
||||
bool SerializationUUID::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
UUID uuid;
|
||||
String field;
|
||||
if (!checkChar('\'', istr) || !tryReadText(uuid, istr) || !checkChar('\'', istr))
|
||||
return false;
|
||||
|
||||
assert_cast<ColumnUUID &>(column).getData().push_back(std::move(uuid));
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationUUID::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -92,6 +104,15 @@ void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
|
||||
assert_cast<ColumnUUID &>(column).getData().push_back(x);
|
||||
}
|
||||
|
||||
bool SerializationUUID::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
UUID x;
|
||||
if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr))
|
||||
return false;
|
||||
assert_cast<ColumnUUID &>(column).getData().push_back(x);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationUUID::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
writeChar('"', ostr);
|
||||
@ -106,6 +127,14 @@ void SerializationUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
||||
assert_cast<ColumnUUID &>(column).getData().push_back(value);
|
||||
}
|
||||
|
||||
bool SerializationUUID::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
UUID value;
|
||||
if (!tryReadCSV(value, istr))
|
||||
return false;
|
||||
assert_cast<ColumnUUID &>(column).getData().push_back(value);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
|
@ -10,14 +10,16 @@ class SerializationUUID : public SimpleTextSerialization
|
||||
public:
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
840
src/DataTypes/Serializations/SerializationVariant.cpp
Normal file
840
src/DataTypes/Serializations/SerializationVariant.cpp
Normal file
@ -0,0 +1,840 @@
|
||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationNullable.h>
|
||||
#include <DataTypes/Serializations/SerializationNamed.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
void SerializationVariant::enumerateStreams(
|
||||
EnumerateStreamsSettings & settings,
|
||||
const StreamCallback & callback,
|
||||
const SubstreamData & data) const
|
||||
{
|
||||
const auto * type_variant = data.type ? &assert_cast<const DataTypeVariant &>(*data.type) : nullptr;
|
||||
const auto * column_variant = data.column ? &assert_cast<const ColumnVariant &>(*data.column) : nullptr;
|
||||
|
||||
auto discriminators_serialization = std::make_shared<SerializationNamed>(std::make_shared<SerializationNumber<ColumnVariant::Discriminator>>(), "discr", SubstreamType::NamedVariantDiscriminators);
|
||||
auto local_discriminators = column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr;
|
||||
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
auto discriminators_data = SubstreamData(discriminators_serialization)
|
||||
.withType(type_variant ? std::make_shared<DataTypeNumber<ColumnVariant::Discriminator>>() : nullptr)
|
||||
.withColumn(column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr)
|
||||
.withSerializationInfo(data.serialization_info);
|
||||
|
||||
settings.path.back().data = discriminators_data;
|
||||
callback(settings.path);
|
||||
settings.path.pop_back();
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
settings.path.back().data = data;
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
|
||||
|
||||
auto variant_data = SubstreamData(variants[i])
|
||||
.withType(type_variant ? type_variant->getVariant(i) : nullptr)
|
||||
.withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
|
||||
.withSerializationInfo(data.serialization_info);
|
||||
|
||||
addVariantElementToPath(settings.path, i);
|
||||
settings.path.back().data = variant_data;
|
||||
variants[i]->enumerateStreams(settings, callback, variant_data);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
|
||||
{
|
||||
std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
|
||||
};
|
||||
|
||||
struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
|
||||
};
|
||||
|
||||
void SerializationVariant::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
|
||||
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>();
|
||||
variant_state->states.resize(variants.size());
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
state = std::move(variant_state);
|
||||
}
|
||||
|
||||
|
||||
void SerializationVariant::serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
|
||||
void SerializationVariant::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
|
||||
variant_state->states.resize(variants.size());
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
state = std::move(variant_state);
|
||||
}
|
||||
|
||||
|
||||
void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
if (const size_t size = col.size(); limit == 0 || offset + limit > size)
|
||||
limit = size - offset;
|
||||
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
settings.path.pop_back();
|
||||
|
||||
if (!discriminators_stream)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkWithMultipleStreams");
|
||||
|
||||
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
|
||||
|
||||
/// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate
|
||||
/// offsets and limits for variants and need to just serialize whole columns.
|
||||
if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls())
|
||||
{
|
||||
/// First, serialize discriminators.
|
||||
/// If we have only NULLs or local and global discriminators are the same, just serialize the column as is.
|
||||
if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder())
|
||||
{
|
||||
SerializationNumber<ColumnVariant::Discriminator>().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit);
|
||||
}
|
||||
/// If local and global discriminators are different, we should convert local to global before serializing (because we don't serialize the mapping).
|
||||
else
|
||||
{
|
||||
const auto & local_discriminators = col.getLocalDiscriminators();
|
||||
for (size_t i = offset; i != offset + limit; ++i)
|
||||
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
|
||||
}
|
||||
|
||||
/// Second, serialize variants in global order.
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
|
||||
/// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant.
|
||||
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||
{
|
||||
/// First, serialize discriminators.
|
||||
/// We know that all discriminators are the same, so we just need to serialize this discriminator limit times.
|
||||
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
|
||||
for (size_t i = 0; i != limit; ++i)
|
||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||
|
||||
/// Second, serialize non-empty variant (other variants are empty and we can skip their serialization).
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
addVariantElementToPath(settings.path, non_empty_global_discr);
|
||||
/// We can use the same offset/limit as for whole Variant column
|
||||
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
|
||||
settings.path.pop_back();
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
|
||||
/// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant.
|
||||
const auto & local_discriminators = col.getLocalDiscriminators();
|
||||
const auto & offsets = col.getOffsets();
|
||||
std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variants.size(), {0, 0});
|
||||
size_t end = offset + limit;
|
||||
for (size_t i = offset; i < end; ++i)
|
||||
{
|
||||
auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]);
|
||||
writeBinaryLittleEndian(global_discr, *discriminators_stream);
|
||||
|
||||
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
{
|
||||
/// If we see this discriminator for the first time, update offset
|
||||
if (!variant_offsets_and_limits[global_discr].second)
|
||||
variant_offsets_and_limits[global_discr].first = offsets[i];
|
||||
/// Update limit for this discriminator.
|
||||
++variant_offsets_and_limits[global_discr].second;
|
||||
}
|
||||
}
|
||||
|
||||
/// Serialize variants in global order.
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
/// Serialize variant only if we have its discriminator in the range.
|
||||
if (variant_offsets_and_limits[i].second)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(
|
||||
col.getVariantByGlobalDiscriminator(i),
|
||||
variant_offsets_and_limits[i].first,
|
||||
variant_offsets_and_limits[i].second,
|
||||
settings,
|
||||
variant_state->states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
}
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
|
||||
void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
auto mutable_column = column->assumeMutable();
|
||||
ColumnVariant & col = assert_cast<ColumnVariant &>(*mutable_column);
|
||||
/// We always serialize Variant column with global variants order,
|
||||
/// so while deserialization column should be always with global variants order.
|
||||
if (!col.hasGlobalVariantsOrder())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to deserialize data into Variant column with not global variants order");
|
||||
|
||||
/// First, deserialize discriminators.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
col.getLocalDiscriminatorsPtr() = cached_discriminators;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
if (!discriminators_stream)
|
||||
return;
|
||||
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr());
|
||||
}
|
||||
settings.path.pop_back();
|
||||
|
||||
/// Second, calculate limits for each variant by iterating through new discriminators.
|
||||
std::vector<size_t> variant_limits(variants.size(), 0);
|
||||
auto & discriminators_data = col.getLocalDiscriminators();
|
||||
size_t discriminators_offset = discriminators_data.size() - limit;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
++variant_limits[discr];
|
||||
}
|
||||
|
||||
/// Now we can deserialize variants according to their limits.
|
||||
auto * variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
settings.path.pop_back();
|
||||
|
||||
/// Fill offsets column.
|
||||
/// It's important to do it after deserialization of all variants, because to fill offsets we need
|
||||
/// initial variants sizes without values in current range, but some variants can be shared with
|
||||
/// other columns via substream cache and they can already contain values from this range even
|
||||
/// before we call deserialize for them. So, before deserialize we cannot know for sure if
|
||||
/// variant columns already contain values from current range or not. But after calling deserialize
|
||||
/// we know for sure that they contain these values, so we can use valiant limits and their
|
||||
/// new sizes to calculate correct offsets.
|
||||
settings.path.push_back(Substream::VariantOffsets);
|
||||
if (auto cached_offsets = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
col.getOffsetsPtr() = cached_offsets;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & offsets = col.getOffsets();
|
||||
offsets.reserve(offsets.size() + limit);
|
||||
std::vector<size_t> variant_offsets;
|
||||
variant_offsets.reserve(variants.size());
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
|
||||
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
offsets.emplace_back();
|
||||
else
|
||||
offsets.push_back(variant_offsets[discr]++);
|
||||
}
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr());
|
||||
}
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const
|
||||
{
|
||||
path.push_back(Substream::VariantElement);
|
||||
path.back().variant_element_name = variant_names[i];
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeBinary(const Field & /*field*/, WriteBuffer & /*ostr*/, const FormatSettings & /*settings*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinary from a field is not implemented for SerializationVariant");
|
||||
}
|
||||
|
||||
void SerializationVariant::deserializeBinary(Field & /*field*/, ReadBuffer & /*istr*/, const FormatSettings & /*settings*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method deserializeBinary to a field is not implemented for SerializationVariant");
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto global_discr = col.globalDiscriminatorAt(row_num);
|
||||
writeBinaryLittleEndian(global_discr, ostr);
|
||||
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
variants[global_discr]->serializeBinary(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationVariant::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
ColumnVariant & col = assert_cast<ColumnVariant &>(column);
|
||||
ColumnVariant::Discriminator global_discr;
|
||||
readBinaryLittleEndian(global_discr, istr);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
{
|
||||
col.insertDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & variant_column = col.getVariantByGlobalDiscriminator(global_discr);
|
||||
variants[global_discr]->deserializeBinary(variant_column, istr, settings);
|
||||
col.getLocalDiscriminators().push_back(col.localDiscriminatorByGlobal(global_discr));
|
||||
col.getOffsets().push_back(variant_column.size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
const std::unordered_map<TypeIndex, size_t> & getTypesTextDeserializePriorityMap()
|
||||
{
|
||||
static std::unordered_map<TypeIndex, size_t> priority_map = []
|
||||
{
|
||||
static constexpr std::array priorities = {
|
||||
/// Complex types have highest priority.
|
||||
TypeIndex::Array,
|
||||
TypeIndex::Tuple,
|
||||
TypeIndex::Map,
|
||||
TypeIndex::AggregateFunction,
|
||||
|
||||
/// Enums can be parsed both from strings and numbers.
|
||||
/// So they have high enough priority.
|
||||
TypeIndex::Enum8,
|
||||
TypeIndex::Enum16,
|
||||
|
||||
/// Types that can be parsed from strings.
|
||||
TypeIndex::UUID,
|
||||
TypeIndex::IPv4,
|
||||
TypeIndex::IPv6,
|
||||
|
||||
/// Types that can be parsed from numbers.
|
||||
/// The order:
|
||||
/// 1) Integers
|
||||
/// 2) Big Integers
|
||||
/// 3) Decimals
|
||||
/// 4) Floats
|
||||
/// In each group small types have higher priority.
|
||||
TypeIndex::Int8,
|
||||
TypeIndex::UInt8,
|
||||
TypeIndex::Int16,
|
||||
TypeIndex::UInt16,
|
||||
TypeIndex::Int32,
|
||||
TypeIndex::UInt32,
|
||||
TypeIndex::Int64,
|
||||
TypeIndex::UInt64,
|
||||
TypeIndex::Int128,
|
||||
TypeIndex::UInt128,
|
||||
TypeIndex::Int256,
|
||||
TypeIndex::UInt256,
|
||||
TypeIndex::Decimal32,
|
||||
TypeIndex::Decimal64,
|
||||
TypeIndex::Decimal128,
|
||||
TypeIndex::Decimal256,
|
||||
TypeIndex::Float32,
|
||||
TypeIndex::Float64,
|
||||
|
||||
/// Dates and DateTimes. More simple Date types have higher priority.
|
||||
/// They have lower priority as numbers as some DateTimes sometimes can
|
||||
/// be also parsed from numbers, but we don't want it usually.
|
||||
TypeIndex::Date,
|
||||
TypeIndex::Date32,
|
||||
TypeIndex::DateTime,
|
||||
TypeIndex::DateTime64,
|
||||
|
||||
/// String types have almost the lowest priority,
|
||||
/// as in text formats almost all data can
|
||||
/// be deserialized into String type.
|
||||
TypeIndex::FixedString,
|
||||
TypeIndex::String,
|
||||
};
|
||||
|
||||
std::unordered_map<TypeIndex, size_t> pm;
|
||||
|
||||
pm.reserve(priorities.size());
|
||||
for (size_t i = 0; i != priorities.size(); ++i)
|
||||
pm[priorities[i]] = priorities.size() - i;
|
||||
return pm;
|
||||
}();
|
||||
|
||||
return priority_map;
|
||||
}
|
||||
|
||||
/// We want to create more or less optimal order of types in which we will try text deserializations.
|
||||
/// To do it, for each type we calculate a priority and then sort them by this priority.
|
||||
/// Above we defined priority of each data type, but types can be nested and also we can have LowCardinality and Nullable.
|
||||
/// To sort any nested types we create a priority that is a tuple of 3 elements:
|
||||
/// 1) The maximum depth of nested types like Array/Map/Tuple.
|
||||
/// 2) The combination of simple and complex types priorities.
|
||||
/// 3) The depth of nested types LowCardinality/Nullable.
|
||||
/// So, when we will sort types, first we will sort by the maximum depth of nested types, so more nested types are deserialized first,
|
||||
/// then for types with the same depth we sort by the types priority, and last we sort by the depth of LowCardinality/Nullable types,
|
||||
/// so if we have types with the same level of nesting and the same priority, we will first try to deserialize LowCardinality/Nullable types
|
||||
/// (for example if we have types Array(Array(String)) and Array(Array(Nullable(String))).
|
||||
/// This is just a batch of heuristics.
|
||||
std::tuple<size_t, size_t, size_t> getTypeTextDeserializePriority(const DataTypePtr & type, size_t nested_depth, size_t simple_nested_depth, const std::unordered_map<TypeIndex, size_t> & priority_map)
|
||||
{
|
||||
if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(type.get()))
|
||||
return getTypeTextDeserializePriority(nullable_type->getNestedType(), nested_depth, simple_nested_depth + 1, priority_map);
|
||||
|
||||
if (const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
|
||||
return getTypeTextDeserializePriority(lc_type->getDictionaryType(), nested_depth, simple_nested_depth + 1, priority_map);
|
||||
|
||||
if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get()))
|
||||
{
|
||||
auto [elements_nested_depth, elements_priority, elements_simple_nested_depth] = getTypeTextDeserializePriority(array_type->getNestedType(), nested_depth + 1, simple_nested_depth, priority_map);
|
||||
return {elements_nested_depth, elements_priority + priority_map.at(TypeIndex::Array), elements_simple_nested_depth};
|
||||
}
|
||||
|
||||
if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
|
||||
{
|
||||
size_t max_nested_depth = 0;
|
||||
size_t sum_priority = 0;
|
||||
size_t max_simple_nested_depth = 0;
|
||||
for (const auto & elem : tuple_type->getElements())
|
||||
{
|
||||
auto [elem_nested_depth, elem_priority, elem_simple_nested_depth] = getTypeTextDeserializePriority(elem, nested_depth + 1, simple_nested_depth, priority_map);
|
||||
sum_priority += elem_priority;
|
||||
if (elem_nested_depth > max_nested_depth)
|
||||
max_nested_depth = elem_nested_depth;
|
||||
if (elem_simple_nested_depth > max_simple_nested_depth)
|
||||
max_simple_nested_depth = elem_simple_nested_depth;
|
||||
}
|
||||
|
||||
return {max_nested_depth, sum_priority + priority_map.at(TypeIndex::Tuple), max_simple_nested_depth};
|
||||
}
|
||||
|
||||
if (const auto * map_type = typeid_cast<const DataTypeMap *>(type.get()))
|
||||
{
|
||||
auto [key_max_depth, key_priority, key_simple_nested_depth] = getTypeTextDeserializePriority(map_type->getKeyType(), nested_depth + 1, simple_nested_depth, priority_map);
|
||||
auto [value_max_depth, value_priority, value_simple_nested_depth] = getTypeTextDeserializePriority(map_type->getValueType(), nested_depth + 1, simple_nested_depth, priority_map);
|
||||
return {std::max(key_max_depth, value_max_depth), key_priority + value_priority + priority_map.at(TypeIndex::Map), std::max(key_simple_nested_depth, value_simple_nested_depth)};
|
||||
}
|
||||
|
||||
if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(type.get()))
|
||||
{
|
||||
size_t max_priority = 0;
|
||||
size_t max_depth = 0;
|
||||
size_t max_simple_nested_depth = 0;
|
||||
for (const auto & variant : variant_type->getVariants())
|
||||
{
|
||||
auto [variant_max_depth, variant_priority, variant_simple_nested_depth] = getTypeTextDeserializePriority(variant, nested_depth, simple_nested_depth, priority_map);
|
||||
if (variant_priority > max_priority)
|
||||
max_priority = variant_priority;
|
||||
if (variant_max_depth > max_depth)
|
||||
max_depth = variant_max_depth;
|
||||
if (variant_simple_nested_depth > max_simple_nested_depth)
|
||||
max_simple_nested_depth = variant_simple_nested_depth;
|
||||
}
|
||||
|
||||
return {max_depth, max_priority, max_simple_nested_depth};
|
||||
}
|
||||
|
||||
/// Bool type should have priority higher then all integers.
|
||||
if (isBool(type))
|
||||
return {nested_depth, priority_map.at(TypeIndex::Int8) + 1, simple_nested_depth};
|
||||
|
||||
auto it = priority_map.find(type->getTypeId());
|
||||
return {nested_depth, it == priority_map.end() ? 0 : it->second, simple_nested_depth};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::vector<size_t> SerializationVariant::getVariantsDeserializeTextOrder(const DB::DataTypes & variant_types)
|
||||
{
|
||||
std::vector<std::tuple<size_t, size_t, size_t>> priorities;
|
||||
priorities.reserve(variant_types.size());
|
||||
std::vector<size_t> order;
|
||||
order.reserve(variant_types.size());
|
||||
const auto & priority_map = getTypesTextDeserializePriorityMap();
|
||||
for (size_t i = 0; i != variant_types.size(); ++i)
|
||||
{
|
||||
priorities.push_back(getTypeTextDeserializePriority(variant_types[i], 0, 0, priority_map));
|
||||
order.push_back(i);
|
||||
}
|
||||
|
||||
std::sort(order.begin(), order.end(), [&](size_t left, size_t right) { return priorities[left] > priorities[right]; });
|
||||
return order;
|
||||
}
|
||||
|
||||
|
||||
bool SerializationVariant::tryDeserializeImpl(
|
||||
IColumn & column,
|
||||
const String & field,
|
||||
std::function<bool(ReadBuffer &)> check_for_null,
|
||||
std::function<bool(IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer &)> try_deserialize_nested) const
|
||||
{
|
||||
auto & column_variant = assert_cast<ColumnVariant &>(column);
|
||||
ReadBufferFromString null_buf(field);
|
||||
if (check_for_null(null_buf) && null_buf.eof())
|
||||
{
|
||||
column_variant.insertDefault();
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t global_discr : deserialize_text_order)
|
||||
{
|
||||
ReadBufferFromString variant_buf(field);
|
||||
auto & variant_column = column_variant.getVariantByGlobalDiscriminator(global_discr);
|
||||
size_t prev_size = variant_column.size();
|
||||
if (try_deserialize_nested(variant_column, variants[global_discr], variant_buf) && variant_buf.eof())
|
||||
{
|
||||
column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(global_discr));
|
||||
column_variant.getOffsets().push_back(prev_size);
|
||||
return true;
|
||||
}
|
||||
else if (variant_column.size() > prev_size)
|
||||
{
|
||||
variant_column.popBack(1);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto global_discr = col.globalDiscriminatorAt(row_num);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
SerializationNullable::serializeNullEscaped(ostr, settings);
|
||||
else
|
||||
variants[global_discr]->serializeTextEscaped(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readEscapedString(field, istr);
|
||||
return tryDeserializeTextEscapedImpl(column, field, settings);
|
||||
}
|
||||
|
||||
void SerializationVariant::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readEscapedString(field, istr);
|
||||
if (!tryDeserializeTextEscapedImpl(column, field, settings))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse escaped value of type {} here: {}", variant_name, field);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextEscapedImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
|
||||
{
|
||||
auto check_for_null = [&](ReadBuffer & buf)
|
||||
{
|
||||
return SerializationNullable::tryDeserializeNullEscaped(buf, settings);
|
||||
};
|
||||
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
|
||||
{
|
||||
return variant_serialization->tryDeserializeTextEscaped(variant_column, buf, settings);
|
||||
};
|
||||
|
||||
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto global_discr = col.globalDiscriminatorAt(row_num);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
SerializationNullable::serializeNullRaw(ostr, settings);
|
||||
else
|
||||
variants[global_discr]->serializeTextRaw(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readString(field, istr);
|
||||
return tryDeserializeTextRawImpl(column, field, settings);
|
||||
}
|
||||
|
||||
void SerializationVariant::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readString(field, istr);
|
||||
if (!tryDeserializeTextRawImpl(column, field, settings))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse raw value of type {} here: {}", variant_name, field);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextRawImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
|
||||
{
|
||||
auto check_for_null = [&](ReadBuffer & buf)
|
||||
{
|
||||
return SerializationNullable::tryDeserializeNullRaw(buf, settings);
|
||||
};
|
||||
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
|
||||
{
|
||||
return variant_serialization->tryDeserializeTextRaw(variant_column, buf, settings);
|
||||
};
|
||||
|
||||
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto global_discr = col.globalDiscriminatorAt(row_num);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
SerializationNullable::serializeNullQuoted(ostr);
|
||||
else
|
||||
variants[global_discr]->serializeTextQuoted(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
if (!tryReadQuotedField(field, istr))
|
||||
return false;
|
||||
return tryDeserializeTextQuotedImpl(column, field, settings);
|
||||
}
|
||||
|
||||
void SerializationVariant::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readQuotedField(field, istr);
|
||||
if (!tryDeserializeTextQuotedImpl(column, field, settings))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse quoted value of type {} here: {}", variant_name, field);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextQuotedImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
|
||||
{
|
||||
auto check_for_null = [&](ReadBuffer & buf)
|
||||
{
|
||||
return SerializationNullable::tryDeserializeNullQuoted(buf);
|
||||
};
|
||||
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
|
||||
{
|
||||
return variant_serialization->tryDeserializeTextQuoted(variant_column, buf, settings);
|
||||
};
|
||||
|
||||
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto global_discr = col.globalDiscriminatorAt(row_num);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
SerializationNullable::serializeNullCSV(ostr, settings);
|
||||
else
|
||||
variants[global_discr]->serializeTextCSV(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readCSVStringInto<String, true, false>(field, istr, settings.csv);
|
||||
return tryDeserializeTextCSVImpl(column, field, settings);
|
||||
}
|
||||
|
||||
void SerializationVariant::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readCSVField(field, istr, settings.csv);
|
||||
if (!tryDeserializeTextCSVImpl(column, field, settings))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse CSV value of type {} here: {}", variant_name, field);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextCSVImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
|
||||
{
|
||||
auto check_for_null = [&](ReadBuffer & buf)
|
||||
{
|
||||
return SerializationNullable::tryDeserializeNullCSV(buf, settings);
|
||||
};
|
||||
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
|
||||
{
|
||||
return variant_serialization->tryDeserializeTextCSV(variant_column, buf, settings);
|
||||
};
|
||||
|
||||
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto global_discr = col.globalDiscriminatorAt(row_num);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
SerializationNullable::serializeNullText(ostr, settings);
|
||||
else
|
||||
variants[global_discr]->serializeText(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readStringUntilEOF(field, istr);
|
||||
return tryDeserializeWholeTextImpl(column, field, settings);
|
||||
}
|
||||
|
||||
void SerializationVariant::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readStringUntilEOF(field, istr);
|
||||
if (!tryDeserializeWholeTextImpl(column, field, settings))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse text value of type {} here: {}", variant_name, field);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeWholeTextImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
|
||||
{
|
||||
auto check_for_null = [&](ReadBuffer & buf)
|
||||
{
|
||||
return SerializationNullable::tryDeserializeNullText(buf);
|
||||
};
|
||||
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
|
||||
{
|
||||
return variant_serialization->tryDeserializeWholeText(variant_column, buf, settings);
|
||||
};
|
||||
|
||||
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto global_discr = col.globalDiscriminatorAt(row_num);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
SerializationNullable::serializeNullJSON(ostr);
|
||||
else
|
||||
variants[global_discr]->serializeTextJSON(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
if (!tryReadJSONField(field, istr))
|
||||
return false;
|
||||
return tryDeserializeTextJSONImpl(column, field, settings);
|
||||
}
|
||||
|
||||
void SerializationVariant::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
String field;
|
||||
readJSONField(field, istr);
|
||||
if (!tryDeserializeTextJSONImpl(column, field, settings))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON value of type {} here: {}", variant_name, field);
|
||||
}
|
||||
|
||||
bool SerializationVariant::tryDeserializeTextJSONImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
|
||||
{
|
||||
auto check_for_null = [&](ReadBuffer & buf)
|
||||
{
|
||||
return SerializationNullable::tryDeserializeNullJSON(buf);
|
||||
};
|
||||
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
|
||||
{
|
||||
return variant_serialization->tryDeserializeTextJSON(variant_column, buf, settings);
|
||||
};
|
||||
|
||||
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
|
||||
}
|
||||
|
||||
void SerializationVariant::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto global_discr = col.globalDiscriminatorAt(row_num);
|
||||
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
SerializationNullable::serializeNullXML(ostr);
|
||||
else
|
||||
variants[global_discr]->serializeTextXML(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
}
|
139
src/DataTypes/Serializations/SerializationVariant.h
Normal file
139
src/DataTypes/Serializations/SerializationVariant.h
Normal file
@ -0,0 +1,139 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/Serializations/ISerialization.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Class for serializing/deserializing column with Variant type.
|
||||
/// It supports both text and binary bulk serializations/deserializations.
|
||||
///
|
||||
/// During text serialization it checks discriminator of the current row and
|
||||
/// uses corresponding text serialization of this variant.
|
||||
///
|
||||
/// During text deserialization it tries all variants deserializations
|
||||
/// (using tryDeserializeText* methods of ISerialization) in predefined order
|
||||
/// and inserts data in the first variant with succeeded deserialization.
|
||||
///
|
||||
/// During binary bulk serialization it transforms local discriminators
|
||||
/// to global and serializes them into a separate stream VariantDiscriminators.
|
||||
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
|
||||
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
|
||||
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
|
||||
/// in the discriminators column), so during deserialization the limit for each variant is
|
||||
/// calculated according to discriminators column.
|
||||
/// Offsets column is not serialized and stored only in memory.
|
||||
///
|
||||
/// During binary bulk deserialization we first deserialize discriminators from corresponding stream
|
||||
/// and use them to calculate the limit for each variant. Each variant is deserialized from
|
||||
/// corresponding stream using calculated limit. Offsets column is not deserialized and constructed
|
||||
/// according to discriminators.
|
||||
class SerializationVariant : public ISerialization
|
||||
{
|
||||
public:
|
||||
using VariantSerializations = std::vector<SerializationPtr>;
|
||||
|
||||
explicit SerializationVariant(
|
||||
const VariantSerializations & variants_,
|
||||
const std::vector<String> & variant_names_,
|
||||
const std::vector<size_t> & deserialize_text_order_,
|
||||
const String & variant_name_)
|
||||
: variants(variants_), variant_names(variant_names_), deserialize_text_order(deserialize_text_order_), variant_name(variant_name_)
|
||||
{
|
||||
}
|
||||
|
||||
void enumerateStreams(
|
||||
EnumerateStreamsSettings & settings,
|
||||
const StreamCallback & callback,
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
|
||||
/// Determine the order in which we should try to deserialize variants.
|
||||
/// In some cases the text representation of a value can be deserialized
|
||||
/// into several types (for example, almost all text values can be deserialized
|
||||
/// into String type), so we uses some heuristics to determine the more optimal order.
|
||||
static std::vector<size_t> getVariantsDeserializeTextOrder(const DataTypes & variant_types);
|
||||
|
||||
private:
|
||||
void addVariantElementToPath(SubstreamPath & path, size_t i) const;
|
||||
|
||||
bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeTextCSVImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeTextJSONImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeTextRawImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
|
||||
bool tryDeserializeImpl(
|
||||
IColumn & column,
|
||||
const String & field,
|
||||
std::function<bool(ReadBuffer &)> check_for_null,
|
||||
std::function<bool(IColumn & variant_columm, const SerializationPtr & nested, ReadBuffer &)> try_deserialize_nested) const;
|
||||
|
||||
VariantSerializations variants;
|
||||
std::vector<String> variant_names;
|
||||
std::vector<size_t> deserialize_text_order;
|
||||
/// Name of Variant data type for better exception messages.
|
||||
String variant_name;
|
||||
};
|
||||
|
||||
}
|
271
src/DataTypes/Serializations/SerializationVariantElement.cpp
Normal file
271
src/DataTypes/Serializations/SerializationVariantElement.cpp
Normal file
@ -0,0 +1,271 @@
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
void SerializationVariantElement::enumerateStreams(
|
||||
DB::ISerialization::EnumerateStreamsSettings & settings,
|
||||
const DB::ISerialization::StreamCallback & callback,
|
||||
const DB::ISerialization::SubstreamData & data) const
|
||||
{
|
||||
/// We will need stream for discriminators during deserialization.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
callback(settings.path);
|
||||
settings.path.pop_back();
|
||||
|
||||
addVariantToPath(settings.path);
|
||||
settings.path.back().data = data;
|
||||
nested_serialization->enumerateStreams(settings, callback, data);
|
||||
removeVariantFromPath(settings.path);
|
||||
}
|
||||
|
||||
void SerializationVariantElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElement");
|
||||
}
|
||||
|
||||
void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement");
|
||||
}
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
/// During deserialization discriminators and variant streams can be shared.
|
||||
/// For example we can read several variant elements together: "select v.UInt32, v.String from table",
|
||||
/// or we can read the whole variant and some of variant elements: "select v, v.UInt32 from table".
|
||||
/// To read the same column from the same stream more than once we use substream cache,
|
||||
/// but this cache stores the whole column, not only the current range.
|
||||
/// During deserialization of variant element discriminators and variant columns are not stored
|
||||
/// in the result column, so we need to store them inside deserialization state, so we can use
|
||||
/// substream cache correctly.
|
||||
ColumnPtr discriminators;
|
||||
ColumnPtr variant;
|
||||
|
||||
ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
|
||||
};
|
||||
|
||||
void SerializationVariantElement::deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
auto variant_element_state = std::make_shared<DeserializeBinaryBulkStateVariantElement>();
|
||||
|
||||
addVariantToPath(settings.path);
|
||||
nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state);
|
||||
removeVariantFromPath(settings.path);
|
||||
|
||||
state = std::move(variant_element_state);
|
||||
}
|
||||
|
||||
void SerializationVariantElement::serializeBinaryBulkWithMultipleStreams(const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElement");
|
||||
}
|
||||
|
||||
void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & result_column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
auto * variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||
|
||||
/// First, deserialize discriminators from Variant column.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
variant_element_state->discriminators = cached_discriminators;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
if (!discriminators_stream)
|
||||
return;
|
||||
|
||||
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
|
||||
if (!variant_element_state->discriminators || result_column->empty())
|
||||
variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
|
||||
}
|
||||
settings.path.pop_back();
|
||||
|
||||
/// Iterate through new discriminators to calculate the limit for our variant.
|
||||
const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
|
||||
size_t discriminators_offset = variant_element_state->discriminators->size() - limit;
|
||||
size_t variant_limit = 0;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||
|
||||
/// Now we know the limit for our variant and can deserialize it.
|
||||
|
||||
/// If result column is Nullable, fill null map and extract nested column.
|
||||
MutableColumnPtr mutable_column = result_column->assumeMutable();
|
||||
if (isColumnNullable(*mutable_column))
|
||||
{
|
||||
auto & nullable_column = assert_cast<ColumnNullable &>(*mutable_column);
|
||||
NullMap & null_map = nullable_column.getNullMapData();
|
||||
/// If we have only our discriminator in range, fill null map with 0.
|
||||
if (variant_limit == limit)
|
||||
{
|
||||
null_map.resize_fill(null_map.size() + limit, 0);
|
||||
}
|
||||
/// If no our discriminator in current range, fill null map with 1.
|
||||
else if (variant_limit == 0)
|
||||
{
|
||||
null_map.resize_fill(null_map.size() + limit, 1);
|
||||
}
|
||||
/// Otherwise we should iterate through discriminators to fill null map.
|
||||
else
|
||||
{
|
||||
null_map.reserve(null_map.size() + limit);
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
null_map.push_back(discriminators_data[i] != variant_discriminator);
|
||||
}
|
||||
|
||||
mutable_column = nullable_column.getNestedColumnPtr()->assumeMutable();
|
||||
}
|
||||
|
||||
/// If we started to read a new column, reinitialize variant column in deserialization state.
|
||||
if (!variant_element_state->variant || result_column->empty())
|
||||
{
|
||||
variant_element_state->variant = mutable_column->cloneEmpty();
|
||||
|
||||
/// When result column is LowCardinality(Nullable(T)) we should
|
||||
/// remove Nullable from variant column before deserialization.
|
||||
if (isColumnLowCardinalityNullable(*mutable_column))
|
||||
assert_cast<ColumnLowCardinality &>(*variant_element_state->variant->assumeMutable()).nestedRemoveNullable();
|
||||
}
|
||||
|
||||
/// If nothing to deserialize, just insert defaults.
|
||||
if (variant_limit == 0)
|
||||
{
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
return;
|
||||
}
|
||||
|
||||
addVariantToPath(settings.path);
|
||||
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache);
|
||||
removeVariantFromPath(settings.path);
|
||||
|
||||
/// If nothing was deserialized when variant_limit > 0
|
||||
/// it means that we don't have a stream for such sub-column.
|
||||
/// It may happen during ALTER MODIFY column with Variant extension.
|
||||
/// In this case we should just insert default values.
|
||||
if (variant_element_state->variant->empty())
|
||||
{
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t variant_offset = variant_element_state->variant->size() - variant_limit;
|
||||
|
||||
/// If we have only our discriminator in range, insert the whole range to result column.
|
||||
if (variant_limit == limit)
|
||||
{
|
||||
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit);
|
||||
}
|
||||
/// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator.
|
||||
else
|
||||
{
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
if (discriminators_data[i] == variant_discriminator)
|
||||
mutable_column->insertFrom(*variant_element_state->variant, variant_offset++);
|
||||
else
|
||||
mutable_column->insertDefault();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(Substream::VariantElements);
|
||||
path.push_back(Substream::VariantElement);
|
||||
path.back().variant_element_name = variant_element_name;
|
||||
}
|
||||
|
||||
void SerializationVariantElement::removeVariantFromPath(DB::ISerialization::SubstreamPath & path) const
|
||||
{
|
||||
path.pop_back();
|
||||
path.pop_back();
|
||||
}
|
||||
|
||||
SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
const ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
const ColumnVariant::Discriminator local_variant_discriminator_)
|
||||
: local_discriminators(local_discriminators_)
|
||||
, variant_element_name(variant_element_name_)
|
||||
, global_variant_discriminator(global_variant_discriminator_)
|
||||
, local_variant_discriminator(local_variant_discriminator_)
|
||||
{
|
||||
}
|
||||
|
||||
DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const
|
||||
{
|
||||
return makeNullableOrLowCardinalityNullableSafe(prev);
|
||||
}
|
||||
|
||||
SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const
|
||||
{
|
||||
return std::make_shared<SerializationVariantElement>(prev, variant_element_name, global_variant_discriminator);
|
||||
}
|
||||
|
||||
ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::ColumnPtr & prev) const
|
||||
{
|
||||
/// Case when original Variant column contained only one non-empty variant and no NULLs.
|
||||
/// In this case just use this variant.
|
||||
if (prev->size() == local_discriminators->size())
|
||||
return makeNullableOrLowCardinalityNullableSafe(prev);
|
||||
|
||||
/// If this variant is empty, fill result column with default values.
|
||||
if (prev->empty())
|
||||
{
|
||||
auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty();
|
||||
res->insertManyDefaults(local_discriminators->size());
|
||||
return res;
|
||||
}
|
||||
|
||||
/// In general case we should iterate through discriminators and create null-map for our variant.
|
||||
NullMap null_map;
|
||||
null_map.reserve(local_discriminators->size());
|
||||
const auto & local_discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*local_discriminators).getData();
|
||||
for (auto local_discr : local_discriminators_data)
|
||||
null_map.push_back(local_discr != local_variant_discriminator);
|
||||
|
||||
/// Now we can create new column from null-map and variant column using IColumn::expand.
|
||||
auto res_column = IColumn::mutate(prev);
|
||||
|
||||
/// Special case for LowCardinality. We want the result to be LowCardinality(Nullable),
|
||||
/// but we don't have a good way to apply null-mask for LowCardinality(), so, we first
|
||||
/// convert our column to LowCardinality(Nullable()) and then use expand which will
|
||||
/// fill rows with 0 in mask with default value (that is NULL).
|
||||
if (prev->lowCardinality())
|
||||
res_column = assert_cast<ColumnLowCardinality &>(*res_column).cloneNullable();
|
||||
|
||||
res_column->expand(null_map, /*inverted = */ true);
|
||||
|
||||
if (res_column->canBeInsideNullable())
|
||||
{
|
||||
auto null_map_col = ColumnUInt8::create();
|
||||
null_map_col->getData() = std::move(null_map);
|
||||
return ColumnNullable::create(std::move(res_column), std::move(null_map_col));
|
||||
}
|
||||
|
||||
return res_column;
|
||||
}
|
||||
|
||||
}
|
87
src/DataTypes/Serializations/SerializationVariantElement.h
Normal file
87
src/DataTypes/Serializations/SerializationVariantElement.h
Normal file
@ -0,0 +1,87 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/Serializations/SerializationWrapper.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class SerializationVariant;
|
||||
|
||||
/// Serialization for Variant element when we read it as a subcolumn.
|
||||
class SerializationVariantElement final : public SerializationWrapper
|
||||
{
|
||||
private:
|
||||
/// To be able to deserialize Variant element as a subcolumn
|
||||
/// we need its type name and global discriminator.
|
||||
String variant_element_name;
|
||||
ColumnVariant::Discriminator variant_discriminator;
|
||||
|
||||
public:
|
||||
SerializationVariantElement(const SerializationPtr & nested_, const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_)
|
||||
: SerializationWrapper(nested_)
|
||||
, variant_element_name(variant_element_name_)
|
||||
, variant_discriminator(variant_discriminator_)
|
||||
{
|
||||
}
|
||||
|
||||
void enumerateStreams(
|
||||
EnumerateStreamsSettings & settings,
|
||||
const StreamCallback & callback,
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
private:
|
||||
friend SerializationVariant;
|
||||
|
||||
void addVariantToPath(SubstreamPath & path) const;
|
||||
void removeVariantFromPath(SubstreamPath & path) const;
|
||||
|
||||
struct VariantSubcolumnCreator : public ISubcolumnCreator
|
||||
{
|
||||
const ColumnPtr local_discriminators;
|
||||
const String variant_element_name;
|
||||
const ColumnVariant::Discriminator global_variant_discriminator;
|
||||
const ColumnVariant::Discriminator local_variant_discriminator;
|
||||
|
||||
VariantSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
const ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
const ColumnVariant::Discriminator local_variant_discriminator_);
|
||||
|
||||
DataTypePtr create(const DataTypePtr & prev) const override;
|
||||
ColumnPtr create(const ColumnPtr & prev) const override;
|
||||
SerializationPtr create(const SerializationPtr & prev) const override;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
@ -96,6 +96,11 @@ void SerializationWrapper::deserializeTextEscaped(IColumn & column, ReadBuffer &
|
||||
nested_serialization->deserializeTextEscaped(column, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationWrapper::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return nested_serialization->tryDeserializeTextEscaped(column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationWrapper::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
nested_serialization->serializeTextQuoted(column, row_num, ostr, settings);
|
||||
@ -106,6 +111,11 @@ void SerializationWrapper::deserializeTextQuoted(IColumn & column, ReadBuffer &
|
||||
nested_serialization->deserializeTextQuoted(column, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationWrapper::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return nested_serialization->tryDeserializeTextQuoted(column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationWrapper::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
nested_serialization->serializeTextCSV(column, row_num, ostr, settings);
|
||||
@ -116,6 +126,11 @@ void SerializationWrapper::deserializeTextCSV(IColumn & column, ReadBuffer & ist
|
||||
nested_serialization->deserializeTextCSV(column, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationWrapper::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return nested_serialization->tryDeserializeTextCSV(column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationWrapper::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
nested_serialization->serializeText(column, row_num, ostr, settings);
|
||||
@ -126,6 +141,11 @@ void SerializationWrapper::deserializeWholeText(IColumn & column, ReadBuffer & i
|
||||
nested_serialization->deserializeWholeText(column, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationWrapper::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return nested_serialization->tryDeserializeWholeText(column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationWrapper::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
nested_serialization->serializeTextJSON(column, row_num, ostr, settings);
|
||||
@ -136,6 +156,11 @@ void SerializationWrapper::deserializeTextJSON(IColumn & column, ReadBuffer & is
|
||||
nested_serialization->deserializeTextJSON(column, istr, settings);
|
||||
}
|
||||
|
||||
bool SerializationWrapper::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
return nested_serialization->tryDeserializeTextJSON(column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationWrapper::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
|
||||
{
|
||||
nested_serialization->serializeTextJSONPretty(column, row_num, ostr, settings, indent);
|
||||
|
@ -63,18 +63,23 @@ public:
|
||||
|
||||
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
|
||||
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
|
@ -36,29 +36,67 @@ protected:
|
||||
deserializeText(column, istr, settings, true);
|
||||
}
|
||||
|
||||
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
return tryDeserializeText(column, istr, settings, true);
|
||||
}
|
||||
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
deserializeText(column, istr, settings, false);
|
||||
}
|
||||
|
||||
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
return tryDeserializeText(column, istr, settings, false);
|
||||
}
|
||||
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
deserializeText(column, istr, settings, false);
|
||||
}
|
||||
|
||||
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
return tryDeserializeText(column, istr, settings, false);
|
||||
}
|
||||
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
deserializeText(column, istr, settings, false);
|
||||
}
|
||||
|
||||
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
return tryDeserializeText(column, istr, settings, false);
|
||||
}
|
||||
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
deserializeText(column, istr, settings, false);
|
||||
}
|
||||
|
||||
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
|
||||
{
|
||||
return tryDeserializeText(column, istr, settings, false);
|
||||
}
|
||||
|
||||
/// whole = true means that buffer contains only one value, so we should read until EOF.
|
||||
/// It's needed to check if there is garbage after parsed field.
|
||||
virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const = 0;
|
||||
|
||||
virtual bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
|
||||
{
|
||||
try
|
||||
{
|
||||
deserializeText(column, istr, settings, whole);
|
||||
return true;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -223,6 +223,7 @@ bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_typ
|
||||
case TypeIndex::AggregateFunction:
|
||||
case TypeIndex::Nothing:
|
||||
case TypeIndex::JSONPaths:
|
||||
case TypeIndex::Variant:
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -383,6 +384,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
|
||||
return throwOrReturn<on_error>(types, "because some of them are Maps and some of them are not", ErrorCodes::NO_COMMON_TYPE);
|
||||
|
||||
auto keys_common_type = getLeastSupertype<on_error>(key_types);
|
||||
|
||||
auto values_common_type = getLeastSupertype<on_error>(value_types);
|
||||
/// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype for keys or values,
|
||||
/// keys_common_type or values_common_type will be nullptr, we should return nullptr in this case.
|
||||
@ -424,6 +426,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
|
||||
else
|
||||
{
|
||||
auto nested_type = getLeastSupertype<on_error>(nested_types);
|
||||
|
||||
/// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype,
|
||||
/// nested_type will be nullptr, we should return nullptr in this case.
|
||||
if (!nested_type)
|
||||
@ -637,6 +640,32 @@ DataTypePtr getLeastSupertypeOrString(const DataTypes & types)
|
||||
return getLeastSupertype<LeastSupertypeOnError::String>(types);
|
||||
}
|
||||
|
||||
DataTypePtr getLeastSupertypeOrVariant(const DataTypes & types)
|
||||
{
|
||||
auto common_type = getLeastSupertype<LeastSupertypeOnError::Null>(types);
|
||||
if (common_type)
|
||||
return common_type;
|
||||
|
||||
/// Create Variant with provided arguments as variants.
|
||||
DataTypes variants;
|
||||
for (const auto & type : types)
|
||||
{
|
||||
/// Nested Variant types are not supported. If we have Variant type
|
||||
/// we use all its variants in the result Variant.
|
||||
if (isVariant(type))
|
||||
{
|
||||
const DataTypes & nested_variants = assert_cast<const DataTypeVariant &>(*type).getVariants();
|
||||
variants.insert(variants.end(), nested_variants.begin(), nested_variants.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
variants.push_back(removeNullableOrLowCardinalityNullable(type));
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeVariant>(variants);
|
||||
}
|
||||
|
||||
DataTypePtr tryGetLeastSupertype(const DataTypes & types)
|
||||
{
|
||||
return getLeastSupertype<LeastSupertypeOnError::Null>(types);
|
||||
|
@ -24,6 +24,17 @@ DataTypePtr getLeastSupertype(const DataTypes & types);
|
||||
/// All types can be casted to String, because they can be serialized to String.
|
||||
DataTypePtr getLeastSupertypeOrString(const DataTypes & types);
|
||||
|
||||
/// Same as getLeastSupertype but in case when there is no supertype for provided types
|
||||
/// it uses Variant of these types as a supertype. Any type can be casted to a Variant
|
||||
/// that contains this type.
|
||||
/// As nested Variants are not allowed, if one of the types is Variant, it's variants
|
||||
/// are used in the resulting Variant.
|
||||
/// Examples:
|
||||
/// (UInt64, String) -> Variant(UInt64, String)
|
||||
/// (Array(UInt64), Array(String)) -> Variant(Array(UInt64), Array(String))
|
||||
/// (Variant(UInt64, String), Array(UInt32)) -> Variant(UInt64, String, Array(UInt32))
|
||||
DataTypePtr getLeastSupertypeOrVariant(const DataTypes & types);
|
||||
|
||||
/// Same as above but return nullptr instead of throwing exception.
|
||||
DataTypePtr tryGetLeastSupertype(const DataTypes & types);
|
||||
|
||||
|
@ -925,6 +925,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
|
||||
query_context->setSetting("allow_experimental_nlp_functions", 1);
|
||||
query_context->setSetting("allow_experimental_hash_functions", 1);
|
||||
query_context->setSetting("allow_experimental_object_type", 1);
|
||||
query_context->setSetting("allow_experimental_variant_type", 1);
|
||||
query_context->setSetting("allow_experimental_annoy_index", 1);
|
||||
query_context->setSetting("allow_experimental_usearch_index", 1);
|
||||
query_context->setSetting("allow_experimental_bigint_types", 1);
|
||||
|
@ -109,31 +109,31 @@ bool deserializeFieldByEscapingRule(
|
||||
{
|
||||
case FormatSettings::EscapingRule::Escaped:
|
||||
if (parse_as_nullable)
|
||||
read = SerializationNullable::deserializeTextEscapedImpl(column, buf, format_settings, serialization);
|
||||
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextEscaped(column, buf, format_settings, serialization);
|
||||
else
|
||||
serialization->deserializeTextEscaped(column, buf, format_settings);
|
||||
break;
|
||||
case FormatSettings::EscapingRule::Quoted:
|
||||
if (parse_as_nullable)
|
||||
read = SerializationNullable::deserializeTextQuotedImpl(column, buf, format_settings, serialization);
|
||||
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(column, buf, format_settings, serialization);
|
||||
else
|
||||
serialization->deserializeTextQuoted(column, buf, format_settings);
|
||||
break;
|
||||
case FormatSettings::EscapingRule::CSV:
|
||||
if (parse_as_nullable)
|
||||
read = SerializationNullable::deserializeTextCSVImpl(column, buf, format_settings, serialization);
|
||||
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(column, buf, format_settings, serialization);
|
||||
else
|
||||
serialization->deserializeTextCSV(column, buf, format_settings);
|
||||
break;
|
||||
case FormatSettings::EscapingRule::JSON:
|
||||
if (parse_as_nullable)
|
||||
read = SerializationNullable::deserializeTextJSONImpl(column, buf, format_settings, serialization);
|
||||
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, buf, format_settings, serialization);
|
||||
else
|
||||
serialization->deserializeTextJSON(column, buf, format_settings);
|
||||
break;
|
||||
case FormatSettings::EscapingRule::Raw:
|
||||
if (parse_as_nullable)
|
||||
read = SerializationNullable::deserializeTextRawImpl(column, buf, format_settings, serialization);
|
||||
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextRaw(column, buf, format_settings, serialization);
|
||||
else
|
||||
serialization->deserializeTextRaw(column, buf, format_settings);
|
||||
break;
|
||||
|
@ -282,14 +282,14 @@ namespace JSONUtils
|
||||
ReadBufferFromString buf(str);
|
||||
|
||||
if (as_nullable)
|
||||
return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization);
|
||||
return SerializationNullable::deserializeNullAsDefaultOrNestedWholeText(column, buf, format_settings, serialization);
|
||||
|
||||
serialization->deserializeWholeText(column, buf, format_settings);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (as_nullable)
|
||||
return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization);
|
||||
return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization);
|
||||
|
||||
serialization->deserializeTextJSON(column, in, format_settings);
|
||||
return true;
|
||||
|
@ -966,7 +966,7 @@ namespace
|
||||
if constexpr (is_json)
|
||||
ok = tryReadJSONStringInto(field, buf);
|
||||
else
|
||||
ok = tryReadQuotedStringInto(field, buf);
|
||||
ok = tryReadQuotedString(field, buf);
|
||||
|
||||
if (!ok)
|
||||
return nullptr;
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <DataTypes/DataTypeObject.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/Serializations/SerializationDecimal.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
@ -40,6 +41,7 @@
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnObject.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Columns/ColumnStringHelpers.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/Concepts.h>
|
||||
@ -4066,6 +4068,259 @@ arguments, result_type, input_rows_count); \
|
||||
"Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName());
|
||||
}
|
||||
|
||||
WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const
|
||||
{
|
||||
/// We support only extension of variant type, so, only new types can be added.
|
||||
/// For example: Variant(T1, T2) -> Variant(T1, T2, T3) is supported, but Variant(T1, T2) -> Variant(T1, T3) is not supported.
|
||||
/// We want to extend Variant type for free without rewriting the data, but we sort data types inside Variant during type creation
|
||||
/// (we do it because we want Variant(T1, T2) to be the same as Variant(T2, T1)), but after extension the order of variant types
|
||||
/// (and so their discriminators) can be different. For example: Variant(T1, T3) -> Variant(T1, T2, T3).
|
||||
/// To avoid full rewrite of discriminators column, ColumnVariant supports it's local order of variant columns (and so local
|
||||
/// discriminators) and stores mapping global order -> local order.
|
||||
/// So, to extend Variant with new types for free, we should keep old local order for old variants, append new variants and change
|
||||
/// mapping global order -> local order according to the new global order.
|
||||
|
||||
/// Create map (new variant type) -> (it's global discriminator in new order).
|
||||
const auto & new_variants = to_variant.getVariants();
|
||||
std::unordered_map<String, ColumnVariant::Discriminator> new_variant_types_to_new_global_discriminator;
|
||||
new_variant_types_to_new_global_discriminator.reserve(new_variants.size());
|
||||
for (size_t i = 0; i != new_variants.size(); ++i)
|
||||
new_variant_types_to_new_global_discriminator[new_variants[i]->getName()] = i;
|
||||
|
||||
/// Create set of old variant types.
|
||||
const auto & old_variants = from_variant.getVariants();
|
||||
std::unordered_map<String, ColumnVariant::Discriminator> old_variant_types_to_old_global_discriminator;
|
||||
old_variant_types_to_old_global_discriminator.reserve(old_variants.size());
|
||||
for (size_t i = 0; i != old_variants.size(); ++i)
|
||||
old_variant_types_to_old_global_discriminator[old_variants[i]->getName()] = i;
|
||||
|
||||
/// Check that the set of old variants types is a subset of new variant types and collect new global discriminator for each old global discriminator.
|
||||
std::unordered_map<ColumnVariant::Discriminator, ColumnVariant::Discriminator> old_global_discriminator_to_new;
|
||||
old_global_discriminator_to_new.reserve(old_variants.size());
|
||||
for (const auto & [old_variant_type, old_discriminator] : old_variant_types_to_old_global_discriminator)
|
||||
{
|
||||
auto it = new_variant_types_to_new_global_discriminator.find(old_variant_type);
|
||||
if (it == new_variant_types_to_new_global_discriminator.end())
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_CONVERT_TYPE,
|
||||
"Cannot convert type {} to {}. Conversion between Variant types is allowed only when new Variant type is an extension "
|
||||
"of an initial one", from_variant.getName(), to_variant.getName());
|
||||
old_global_discriminator_to_new[old_discriminator] = it->second;
|
||||
}
|
||||
|
||||
/// Collect variant types and their global discriminators that should be added to the old Variant to get the new Variant.
|
||||
std::vector<std::pair<DataTypePtr, ColumnVariant::Discriminator>> variant_types_and_discriminators_to_add;
|
||||
variant_types_and_discriminators_to_add.reserve(new_variants.size() - old_variants.size());
|
||||
for (size_t i = 0; i != new_variants.size(); ++i)
|
||||
{
|
||||
if (!old_variant_types_to_old_global_discriminator.contains(new_variants[i]->getName()))
|
||||
variant_types_and_discriminators_to_add.emplace_back(new_variants[i], i);
|
||||
}
|
||||
|
||||
return [old_global_discriminator_to_new, variant_types_and_discriminators_to_add]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
|
||||
{
|
||||
const auto & column_variant = assert_cast<const ColumnVariant &>(*arguments.front().column.get());
|
||||
size_t num_old_variants = column_variant.getNumVariants();
|
||||
Columns new_variant_columns;
|
||||
new_variant_columns.reserve(num_old_variants + variant_types_and_discriminators_to_add.size());
|
||||
std::vector<ColumnVariant::Discriminator> new_local_to_global_discriminators;
|
||||
new_local_to_global_discriminators.reserve(num_old_variants + variant_types_and_discriminators_to_add.size());
|
||||
for (size_t i = 0; i != num_old_variants; ++i)
|
||||
{
|
||||
new_variant_columns.push_back(column_variant.getVariantPtrByLocalDiscriminator(i));
|
||||
new_local_to_global_discriminators.push_back(old_global_discriminator_to_new.at(column_variant.globalDiscriminatorByLocal(i)));
|
||||
}
|
||||
|
||||
for (const auto & [new_variant_type, new_global_discriminator] : variant_types_and_discriminators_to_add)
|
||||
{
|
||||
new_variant_columns.push_back(new_variant_type->createColumn());
|
||||
new_local_to_global_discriminators.push_back(new_global_discriminator);
|
||||
}
|
||||
|
||||
return ColumnVariant::create(column_variant.getLocalDiscriminatorsPtr(), column_variant.getOffsetsPtr(), new_variant_columns, new_local_to_global_discriminators);
|
||||
};
|
||||
}
|
||||
|
||||
WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const
|
||||
{
|
||||
const auto & variant_types = from_variant.getVariants();
|
||||
std::vector<WrapperType> variant_wrappers;
|
||||
variant_wrappers.reserve(variant_types.size());
|
||||
|
||||
/// Create conversion wrapper for each variant.
|
||||
for (const auto & variant_type : variant_types)
|
||||
variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type));
|
||||
|
||||
return [variant_wrappers, variant_types, to_type]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
|
||||
{
|
||||
const auto & column_variant = assert_cast<const ColumnVariant &>(*arguments.front().column.get());
|
||||
|
||||
/// First, cast each variant to the result type.
|
||||
std::vector<ColumnPtr> casted_variant_columns;
|
||||
casted_variant_columns.reserve(variant_types.size());
|
||||
for (size_t i = 0; i != variant_types.size(); ++i)
|
||||
{
|
||||
auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i);
|
||||
ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }};
|
||||
const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)];
|
||||
casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
|
||||
}
|
||||
|
||||
/// Second, construct resulting column from casted variant columns according to discriminators.
|
||||
const auto & local_discriminators = column_variant.getLocalDiscriminators();
|
||||
auto res = result_type->createColumn();
|
||||
res->reserve(input_rows_count);
|
||||
for (size_t i = 0; i != input_rows_count; ++i)
|
||||
{
|
||||
auto local_discr = local_discriminators[i];
|
||||
if (local_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
res->insertDefault();
|
||||
else
|
||||
res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i));
|
||||
}
|
||||
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
static ColumnPtr createVariantFromDescriptorsAndOneNonEmptyVariant(const DataTypes & variant_types, const ColumnPtr & discriminators, const ColumnPtr & variant, ColumnVariant::Discriminator variant_discr)
|
||||
{
|
||||
Columns variants;
|
||||
variants.reserve(variant_types.size());
|
||||
for (size_t i = 0; i != variant_types.size(); ++i)
|
||||
{
|
||||
if (i == variant_discr)
|
||||
variants.emplace_back(variant);
|
||||
else
|
||||
variants.push_back(variant_types[i]->createColumn());
|
||||
}
|
||||
|
||||
return ColumnVariant::create(discriminators, variants);
|
||||
}
|
||||
|
||||
WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const
|
||||
{
|
||||
/// We allow converting NULL to Variant(...) as Variant can store NULLs.
|
||||
if (from_type->onlyNull())
|
||||
{
|
||||
return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
|
||||
{
|
||||
auto result_column = result_type->createColumn();
|
||||
result_column->insertManyDefaults(input_rows_count);
|
||||
return result_column;
|
||||
};
|
||||
}
|
||||
|
||||
auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(removeNullableOrLowCardinalityNullable(from_type));
|
||||
if (!variant_discr_opt)
|
||||
throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName());
|
||||
|
||||
return [variant_discr = *variant_discr_opt]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) -> ColumnPtr
|
||||
{
|
||||
const auto & result_variant_type = assert_cast<const DataTypeVariant &>(*result_type);
|
||||
const auto & variant_types = result_variant_type.getVariants();
|
||||
if (const ColumnNullable * col_nullable = typeid_cast<const ColumnNullable *>(arguments.front().column.get()))
|
||||
{
|
||||
const auto & column = col_nullable->getNestedColumnPtr();
|
||||
const auto & null_map = col_nullable->getNullMapData();
|
||||
IColumn::Filter filter;
|
||||
filter.reserve(column->size());
|
||||
auto discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||
auto & discriminators_data = discriminators->getData();
|
||||
discriminators_data.reserve(column->size());
|
||||
size_t variant_size_hint = 0;
|
||||
for (size_t i = 0; i != column->size(); ++i)
|
||||
{
|
||||
if (null_map[i])
|
||||
{
|
||||
discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
filter.push_back(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
discriminators_data.push_back(variant_discr);
|
||||
filter.push_back(1);
|
||||
++variant_size_hint;
|
||||
}
|
||||
}
|
||||
|
||||
ColumnPtr variant_column;
|
||||
/// If there were no NULLs, just use the column.
|
||||
if (variant_size_hint == column->size())
|
||||
variant_column = column;
|
||||
/// Otherwise we should use filtered column.
|
||||
else
|
||||
variant_column = column->filter(filter, variant_size_hint);
|
||||
return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), variant_column, variant_discr);
|
||||
}
|
||||
else if (isColumnLowCardinalityNullable(*arguments.front().column))
|
||||
{
|
||||
const auto & column = arguments.front().column;
|
||||
|
||||
/// Variant column cannot have LowCardinality(Nullable(...)) variant, as Variant column stores NULLs itself.
|
||||
/// We should create a null-map, insert NULL_DISCRIMINATOR on NULL values and filter initial column.
|
||||
const auto & col_lc = assert_cast<const ColumnLowCardinality &>(*column);
|
||||
const auto & indexes = col_lc.getIndexes();
|
||||
auto null_index = col_lc.getDictionary().getNullValueIndex();
|
||||
IColumn::Filter filter;
|
||||
filter.reserve(col_lc.size());
|
||||
auto discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||
auto & discriminators_data = discriminators->getData();
|
||||
discriminators_data.reserve(col_lc.size());
|
||||
size_t variant_size_hint = 0;
|
||||
for (size_t i = 0; i != col_lc.size(); ++i)
|
||||
{
|
||||
if (indexes.getUInt(i) == null_index)
|
||||
{
|
||||
discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR);
|
||||
filter.push_back(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
discriminators_data.push_back(variant_discr);
|
||||
filter.push_back(1);
|
||||
++variant_size_hint;
|
||||
}
|
||||
}
|
||||
|
||||
MutableColumnPtr variant_column;
|
||||
/// If there were no NULLs, we can just clone the column.
|
||||
if (variant_size_hint == col_lc.size())
|
||||
variant_column = IColumn::mutate(column);
|
||||
/// Otherwise we should filter column.
|
||||
else
|
||||
variant_column = column->filter(filter, variant_size_hint)->assumeMutable();
|
||||
|
||||
assert_cast<ColumnLowCardinality &>(*variant_column).nestedRemoveNullable();
|
||||
return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), std::move(variant_column), variant_discr);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & column = arguments.front().column;
|
||||
auto discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||
discriminators->getData().resize_fill(column->size(), variant_discr);
|
||||
return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), column, variant_discr);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Wrapper for conversion to/from Variant type
|
||||
WrapperType createVariantWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const
|
||||
{
|
||||
if (const auto * from_variant = checkAndGetDataType<DataTypeVariant>(from_type.get()))
|
||||
{
|
||||
if (const auto * to_variant = checkAndGetDataType<DataTypeVariant>(to_type.get()))
|
||||
return createVariantToVariantWrapper(*from_variant, *to_variant);
|
||||
|
||||
return createVariantToColumnWrapper(*from_variant, to_type);
|
||||
}
|
||||
|
||||
return createColumnToVariantWrapper(from_type, assert_cast<const DataTypeVariant &>(*to_type));
|
||||
}
|
||||
|
||||
template <typename FieldType>
|
||||
WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum<FieldType> * to_type) const
|
||||
{
|
||||
@ -4245,6 +4500,11 @@ arguments, result_type, input_rows_count); \
|
||||
|
||||
WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const
|
||||
{
|
||||
/// Conversion from/to Variant data type is processed in a special way.
|
||||
/// We don't need to remove LowCardinality/Nullable.
|
||||
if (isVariant(to_type) || isVariant(from_type))
|
||||
return createVariantWrapper(from_type, to_type);
|
||||
|
||||
const auto * from_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(from_type.get());
|
||||
const auto * to_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(to_type.get());
|
||||
const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type;
|
||||
@ -4252,7 +4512,7 @@ arguments, result_type, input_rows_count); \
|
||||
|
||||
if (from_type->onlyNull())
|
||||
{
|
||||
if (!to_nested->isNullable())
|
||||
if (!to_nested->isNullable() && !isVariant(to_type))
|
||||
{
|
||||
if (cast_type == CastType::accurateOrNull)
|
||||
{
|
||||
|
@ -313,7 +313,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
|
||||
{
|
||||
bool use_default_implementation_for_sparse_columns = useDefaultImplementationForSparseColumns();
|
||||
/// DataTypeFunction does not support obtaining default (isDefaultAt())
|
||||
/// ColumnFunction does not support getting specific values
|
||||
/// ColumnFunction does not support getting specific values.
|
||||
if (result_type->getTypeId() != TypeIndex::Function && use_default_implementation_for_sparse_columns)
|
||||
{
|
||||
size_t num_sparse_columns = 0;
|
||||
@ -368,7 +368,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
|
||||
if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1)
|
||||
{
|
||||
const auto & offsets_data = assert_cast<const ColumnVector<UInt64> &>(*sparse_offsets).getData();
|
||||
return res->createWithOffsets(offsets_data, (*res)[0], input_rows_count, /*shift=*/ 1);
|
||||
return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/ 1);
|
||||
}
|
||||
|
||||
return ColumnSparse::create(res, sparse_offsets, input_rows_count);
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -14,9 +15,12 @@ class FunctionArray : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "array";
|
||||
static FunctionPtr create(ContextPtr)
|
||||
|
||||
explicit FunctionArray(bool use_variant_as_common_type_ = false) : use_variant_as_common_type(use_variant_as_common_type_) {}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionArray>();
|
||||
return std::make_shared<FunctionArray>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
@ -31,6 +35,9 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (use_variant_as_common_type)
|
||||
return std::make_shared<DataTypeArray>(getLeastSupertypeOrVariant(arguments));
|
||||
|
||||
return std::make_shared<DataTypeArray>(getLeastSupertype(arguments));
|
||||
}
|
||||
|
||||
@ -97,6 +104,8 @@ private:
|
||||
}
|
||||
|
||||
bool addField(DataTypePtr type_res, const Field & f, Array & arr) const;
|
||||
|
||||
bool use_variant_as_common_type = false;
|
||||
};
|
||||
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
@ -14,6 +15,7 @@
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/assert_cast.h>
|
||||
@ -22,6 +24,8 @@
|
||||
#include <Functions/GatherUtils/Algorithms.h>
|
||||
#include <Functions/FunctionIfBase.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <type_traits>
|
||||
|
||||
@ -258,9 +262,16 @@ class FunctionIf : public FunctionIfBase
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "if";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIf>(); }
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionIf>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
|
||||
}
|
||||
|
||||
explicit FunctionIf(bool use_variant_when_no_common_type_ = false) : FunctionIfBase(), use_variant_when_no_common_type(use_variant_when_no_common_type_) {}
|
||||
|
||||
private:
|
||||
bool use_variant_when_no_common_type = false;
|
||||
|
||||
template <typename T0, typename T1>
|
||||
static UInt32 decimalScale(const ColumnsWithTypeAndName & arguments [[maybe_unused]])
|
||||
{
|
||||
@ -669,13 +680,17 @@ private:
|
||||
}
|
||||
|
||||
static ColumnPtr executeGeneric(
|
||||
const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count)
|
||||
const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count, bool use_variant_when_no_common_type)
|
||||
{
|
||||
/// Convert both columns to the common type (if needed).
|
||||
const ColumnWithTypeAndName & arg1 = arguments[1];
|
||||
const ColumnWithTypeAndName & arg2 = arguments[2];
|
||||
|
||||
DataTypePtr common_type = getLeastSupertype(DataTypes{arg1.type, arg2.type});
|
||||
DataTypePtr common_type;
|
||||
if (use_variant_when_no_common_type)
|
||||
common_type = getLeastSupertypeOrVariant(DataTypes{arg1.type, arg2.type});
|
||||
else
|
||||
common_type = getLeastSupertype(DataTypes{arg1.type, arg2.type});
|
||||
|
||||
ColumnPtr col_then = castColumn(arg1, common_type);
|
||||
ColumnPtr col_else = castColumn(arg2, common_type);
|
||||
@ -850,6 +865,10 @@ private:
|
||||
|
||||
ColumnPtr executeForNullableThenElse(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
|
||||
{
|
||||
/// If result type is Variant, we don't need to remove Nullable.
|
||||
if (isVariant(result_type))
|
||||
return nullptr;
|
||||
|
||||
const ColumnWithTypeAndName & arg_cond = arguments[0];
|
||||
const ColumnWithTypeAndName & arg_then = arguments[1];
|
||||
const ColumnWithTypeAndName & arg_else = arguments[2];
|
||||
@ -955,6 +974,11 @@ private:
|
||||
assert_cast<ColumnNullable &>(*result_column).applyNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column));
|
||||
return result_column;
|
||||
}
|
||||
else if (auto * variant_column = typeid_cast<ColumnVariant *>(result_column.get()))
|
||||
{
|
||||
variant_column->applyNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column).getData());
|
||||
return result_column;
|
||||
}
|
||||
else
|
||||
return ColumnNullable::create(materializeColumnIfConst(result_column), arg_cond.column);
|
||||
}
|
||||
@ -993,6 +1017,11 @@ private:
|
||||
assert_cast<ColumnNullable &>(*result_column).applyNegatedNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column));
|
||||
return result_column;
|
||||
}
|
||||
else if (auto * variant_column = typeid_cast<ColumnVariant *>(result_column.get()))
|
||||
{
|
||||
variant_column->applyNegatedNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column).getData());
|
||||
return result_column;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t size = input_rows_count;
|
||||
@ -1082,6 +1111,9 @@ public:
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument (condition) of function if. "
|
||||
"Must be UInt8.", arguments[0]->getName());
|
||||
|
||||
if (use_variant_when_no_common_type)
|
||||
return getLeastSupertypeOrVariant(DataTypes{arguments[1], arguments[2]});
|
||||
|
||||
return getLeastSupertype(DataTypes{arguments[1], arguments[2]});
|
||||
}
|
||||
|
||||
@ -1165,7 +1197,7 @@ public:
|
||||
|| (res = executeGenericArray(cond_col, arguments, result_type))
|
||||
|| (res = executeTuple(arguments, result_type, input_rows_count))))
|
||||
{
|
||||
return executeGeneric(cond_col, arguments, input_rows_count);
|
||||
return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type);
|
||||
}
|
||||
|
||||
return res;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
@ -45,6 +46,18 @@ public:
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const ColumnWithTypeAndName & elem = arguments[0];
|
||||
|
||||
if (isVariant(elem.type))
|
||||
{
|
||||
const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
|
||||
auto res = DataTypeUInt8().createColumn();
|
||||
auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
|
||||
data.reserve(discriminators.size());
|
||||
for (auto discr : discriminators)
|
||||
data.push_back(discr != ColumnVariant::NULL_DISCRIMINATOR);
|
||||
return res;
|
||||
}
|
||||
|
||||
if (elem.type->isLowCardinalityNullable())
|
||||
{
|
||||
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(*elem.column);
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -44,6 +45,18 @@ public:
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
|
||||
{
|
||||
const ColumnWithTypeAndName & elem = arguments[0];
|
||||
|
||||
if (isVariant(elem.type))
|
||||
{
|
||||
const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
|
||||
auto res = DataTypeUInt8().createColumn();
|
||||
auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
|
||||
data.reserve(discriminators.size());
|
||||
for (auto discr : discriminators)
|
||||
data.push_back(discr == ColumnVariant::NULL_DISCRIMINATOR);
|
||||
return res;
|
||||
}
|
||||
|
||||
if (elem.type->isLowCardinalityNullable())
|
||||
{
|
||||
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(*elem.column);
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
|
||||
|
||||
@ -30,9 +31,11 @@ class FunctionMap : public IFunction
|
||||
public:
|
||||
static constexpr auto name = "map";
|
||||
|
||||
static FunctionPtr create(ContextPtr)
|
||||
explicit FunctionMap(bool use_variant_as_common_type_) : use_variant_as_common_type(use_variant_as_common_type_) {}
|
||||
|
||||
static FunctionPtr create(ContextPtr context)
|
||||
{
|
||||
return std::make_shared<FunctionMap>();
|
||||
return std::make_shared<FunctionMap>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
@ -77,8 +80,16 @@ public:
|
||||
}
|
||||
|
||||
DataTypes tmp;
|
||||
tmp.emplace_back(getLeastSupertype(keys));
|
||||
tmp.emplace_back(getLeastSupertype(values));
|
||||
if (use_variant_as_common_type)
|
||||
{
|
||||
tmp.emplace_back(getLeastSupertypeOrVariant(keys));
|
||||
tmp.emplace_back(getLeastSupertypeOrVariant(values));
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp.emplace_back(getLeastSupertype(keys));
|
||||
tmp.emplace_back(getLeastSupertype(values));
|
||||
}
|
||||
return std::make_shared<DataTypeMap>(tmp);
|
||||
}
|
||||
|
||||
@ -138,6 +149,9 @@ public:
|
||||
|
||||
return ColumnMap::create(nested_column);
|
||||
}
|
||||
|
||||
private:
|
||||
bool use_variant_as_common_type = false;
|
||||
};
|
||||
|
||||
/// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
|
||||
|
||||
@ -117,6 +118,9 @@ public:
|
||||
types_of_branches.emplace_back(arg);
|
||||
});
|
||||
|
||||
if (context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type)
|
||||
return getLeastSupertypeOrVariant(types_of_branches);
|
||||
|
||||
return getLeastSupertype(types_of_branches);
|
||||
}
|
||||
|
||||
|
238
src/Functions/variantElement.cpp
Normal file
238
src/Functions/variantElement.cpp
Normal file
@ -0,0 +1,238 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Extract element of Variant by variant type name.
|
||||
* Also the function looks through Arrays: you can get Array of Variant elements from Array of Variants.
|
||||
*/
|
||||
class FunctionVariantElement : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "variantElement";
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionVariantElement>(); }
|
||||
String getName() const override { return name; }
|
||||
bool isVariadic() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
const size_t number_of_arguments = arguments.size();
|
||||
|
||||
if (number_of_arguments < 2 || number_of_arguments > 3)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
|
||||
getName(), number_of_arguments);
|
||||
|
||||
size_t count_arrays = 0;
|
||||
const IDataType * input_type = arguments[0].type.get();
|
||||
while (const DataTypeArray * array = checkAndGetDataType<DataTypeArray>(input_type))
|
||||
{
|
||||
input_type = array->getNestedType().get();
|
||||
++count_arrays;
|
||||
}
|
||||
|
||||
const DataTypeVariant * variant_type = checkAndGetDataType<DataTypeVariant>(input_type);
|
||||
if (!variant_type)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument for function {} must be Variant or Array of Variant. Actual {}",
|
||||
getName(),
|
||||
arguments[0].type->getName());
|
||||
|
||||
std::optional<size_t> variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *variant_type, number_of_arguments);
|
||||
if (variant_global_discr.has_value())
|
||||
{
|
||||
DataTypePtr return_type = makeNullableOrLowCardinalityNullableSafe(variant_type->getVariant(variant_global_discr.value()));
|
||||
|
||||
for (; count_arrays; --count_arrays)
|
||||
return_type = std::make_shared<DataTypeArray>(return_type);
|
||||
|
||||
return return_type;
|
||||
}
|
||||
else
|
||||
return arguments[2].type;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const auto & input_arg = arguments[0];
|
||||
const IDataType * input_type = input_arg.type.get();
|
||||
const IColumn * input_col = input_arg.column.get();
|
||||
|
||||
bool input_arg_is_const = false;
|
||||
if (typeid_cast<const ColumnConst *>(input_col))
|
||||
{
|
||||
input_col = assert_cast<const ColumnConst *>(input_col)->getDataColumnPtr().get();
|
||||
input_arg_is_const = true;
|
||||
}
|
||||
|
||||
Columns array_offsets;
|
||||
while (const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(input_type))
|
||||
{
|
||||
const ColumnArray * array_col = assert_cast<const ColumnArray *>(input_col);
|
||||
|
||||
input_type = array_type->getNestedType().get();
|
||||
input_col = &array_col->getData();
|
||||
array_offsets.push_back(array_col->getOffsetsPtr());
|
||||
}
|
||||
|
||||
const DataTypeVariant * input_type_as_variant = checkAndGetDataType<DataTypeVariant>(input_type);
|
||||
const ColumnVariant * input_col_as_variant = checkAndGetColumn<ColumnVariant>(input_col);
|
||||
if (!input_type_as_variant || !input_col_as_variant)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument for function {} must be Variant or array of Variants. Actual {}", getName(), input_arg.type->getName());
|
||||
|
||||
std::optional<size_t> variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size());
|
||||
|
||||
if (!variant_global_discr.has_value())
|
||||
return arguments[2].column;
|
||||
|
||||
const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr);
|
||||
const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr);
|
||||
|
||||
/// If Variant has only NULLs or our variant doesn't have any real values,
|
||||
/// just create column with default values and create null mask with 1.
|
||||
if (input_col_as_variant->hasOnlyNulls() || variant_column->empty())
|
||||
{
|
||||
auto res = variant_type->createColumn();
|
||||
|
||||
if (variant_type->lowCardinality())
|
||||
assert_cast<ColumnLowCardinality &>(*res).nestedToNullable();
|
||||
|
||||
res->insertManyDefaults(input_col_as_variant->size());
|
||||
if (!variant_type->canBeInsideNullable())
|
||||
return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count);
|
||||
|
||||
auto null_map = ColumnUInt8::create();
|
||||
auto & null_map_data = null_map->getData();
|
||||
null_map_data.resize_fill(input_col_as_variant->size(), 1);
|
||||
return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(res), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count);
|
||||
}
|
||||
|
||||
/// If we extract single non-empty column and have no NULLs, then just return this variant.
|
||||
if (auto non_empty_local_discr = input_col_as_variant->getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||
{
|
||||
/// If we were trying to extract some other variant,
|
||||
/// it would be empty and we would already processed this case above.
|
||||
chassert(input_col_as_variant->globalDiscriminatorByLocal(*non_empty_local_discr) == variant_global_discr);
|
||||
return wrapInArraysAndConstIfNeeded(makeNullableOrLowCardinalityNullableSafe(variant_column), array_offsets, input_arg_is_const, input_rows_count);
|
||||
}
|
||||
|
||||
/// In general case we should calculate null-mask for variant
|
||||
/// according to the discriminators column and expand
|
||||
/// variant column by this mask to get a full column (with default values on NULLs)
|
||||
const auto & local_discriminators = input_col_as_variant->getLocalDiscriminators();
|
||||
auto null_map = ColumnUInt8::create();
|
||||
auto & null_map_data = null_map->getData();
|
||||
null_map_data.reserve(local_discriminators.size());
|
||||
auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr);
|
||||
for (auto local_discr : local_discriminators)
|
||||
null_map_data.push_back(local_discr != variant_local_discr);
|
||||
|
||||
auto expanded_variant_column = IColumn::mutate(variant_column);
|
||||
if (variant_type->lowCardinality())
|
||||
expanded_variant_column = assert_cast<ColumnLowCardinality &>(*expanded_variant_column).cloneNullable();
|
||||
expanded_variant_column->expand(null_map_data, /*inverted = */ true);
|
||||
if (variant_type->canBeInsideNullable())
|
||||
return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(expanded_variant_column), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count);
|
||||
return wrapInArraysAndConstIfNeeded(std::move(expanded_variant_column), array_offsets, input_arg_is_const, input_rows_count);
|
||||
}
|
||||
private:
|
||||
std::optional<size_t> getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const
|
||||
{
|
||||
const auto * name_col = checkAndGetColumnConst<ColumnString>(index_column.get());
|
||||
if (!name_col)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Second argument to {} with Variant argument must be a constant String",
|
||||
getName());
|
||||
|
||||
String variant_element_name = name_col->getValue<String>();
|
||||
auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name);
|
||||
if (variant_element_type)
|
||||
{
|
||||
const auto & variants = variant_type.getVariants();
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
if (variants[i]->getName() == variant_element_type->getName())
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
if (argument_size == 2)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} doesn't contain variant with type {}", variant_type.getName(), variant_element_name);
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
ColumnPtr wrapInArraysAndConstIfNeeded(ColumnPtr res, const Columns & array_offsets, bool input_arg_is_const, size_t input_rows_count) const
|
||||
{
|
||||
for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it)
|
||||
res = ColumnArray::create(res, *it);
|
||||
|
||||
if (input_arg_is_const)
|
||||
res = ColumnConst::create(res, input_rows_count);
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(VariantElement)
|
||||
{
|
||||
factory.registerFunction<FunctionVariantElement>(FunctionDocumentation{
|
||||
.description = R"(
|
||||
Extracts a column with specified type from a `Variant` column.
|
||||
)",
|
||||
.syntax{"tupleElement(variant, type_name, [, default_value])"},
|
||||
.arguments{{
|
||||
{"variant", "Variant column"},
|
||||
{"type_name", "The name of the variant type to extract"},
|
||||
{"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}},
|
||||
.examples{{{
|
||||
"Example",
|
||||
R"(
|
||||
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
|
||||
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;)",
|
||||
R"(
|
||||
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
|
||||
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
|
||||
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
|
||||
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
|
||||
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
|
||||
)"}}},
|
||||
.categories{"Variant"},
|
||||
});
|
||||
}
|
||||
|
||||
}
|
@ -619,13 +619,16 @@ void readQuotedStringInto(Vector & s, ReadBuffer & buf)
|
||||
readAnyQuotedStringInto<'\'', enable_sql_style_quoting>(s, buf);
|
||||
}
|
||||
|
||||
template <typename Vector>
|
||||
template <bool enable_sql_style_quoting, typename Vector>
|
||||
bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
return readAnyQuotedStringInto<'\'', false, Vector, bool>(s, buf);
|
||||
return readAnyQuotedStringInto<'\'', enable_sql_style_quoting, Vector, bool>(s, buf);
|
||||
}
|
||||
|
||||
template bool tryReadQuotedStringInto(String & s, ReadBuffer & buf);
|
||||
template bool tryReadQuotedStringInto<true, String>(String & s, ReadBuffer & buf);
|
||||
template bool tryReadQuotedStringInto<false, String>(String & s, ReadBuffer & buf);
|
||||
template bool tryReadQuotedStringInto<true, PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
template bool tryReadQuotedStringInto<false, PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
|
||||
template <bool enable_sql_style_quoting, typename Vector>
|
||||
void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
|
||||
@ -633,6 +636,16 @@ void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
|
||||
readAnyQuotedStringInto<'"', enable_sql_style_quoting>(s, buf);
|
||||
}
|
||||
|
||||
template <bool enable_sql_style_quoting, typename Vector>
|
||||
bool tryReadDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
return readAnyQuotedStringInto<'"', enable_sql_style_quoting, Vector, bool>(s, buf);
|
||||
}
|
||||
|
||||
template bool tryReadDoubleQuotedStringInto<true, String>(String & s, ReadBuffer & buf);
|
||||
template bool tryReadDoubleQuotedStringInto<false, String>(String & s, ReadBuffer & buf);
|
||||
|
||||
|
||||
template <bool enable_sql_style_quoting, typename Vector>
|
||||
void readBackQuotedStringInto(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
@ -652,6 +665,18 @@ void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
|
||||
readQuotedStringInto<true>(s, buf);
|
||||
}
|
||||
|
||||
bool tryReadQuotedString(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
return tryReadQuotedStringInto<false>(s, buf);
|
||||
}
|
||||
|
||||
bool tryReadQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
return tryReadQuotedStringInto<true>(s, buf);
|
||||
}
|
||||
|
||||
|
||||
template void readQuotedStringInto<true>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
template void readQuotedStringInto<true>(String & s, ReadBuffer & buf);
|
||||
@ -672,6 +697,18 @@ void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
|
||||
readDoubleQuotedStringInto<true>(s, buf);
|
||||
}
|
||||
|
||||
bool tryReadDoubleQuotedString(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
return tryReadDoubleQuotedStringInto<false>(s, buf);
|
||||
}
|
||||
|
||||
bool tryReadDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
return tryReadDoubleQuotedStringInto<true>(s, buf);
|
||||
}
|
||||
|
||||
void readBackQuotedString(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
@ -691,7 +728,7 @@ concept WithResize = requires (T value)
|
||||
{ value.size() } -> std::integral<>;
|
||||
};
|
||||
|
||||
template <typename Vector, bool include_quotes>
|
||||
template <typename Vector, bool include_quotes, bool allow_throw>
|
||||
void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings)
|
||||
{
|
||||
/// Empty string
|
||||
@ -754,12 +791,20 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
|
||||
{
|
||||
PeekableReadBuffer * peekable_buf = dynamic_cast<PeekableReadBuffer *>(&buf);
|
||||
if (!peekable_buf)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer");
|
||||
{
|
||||
if constexpr (allow_throw)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer");
|
||||
return;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (peekable_buf->eof())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter);
|
||||
{
|
||||
if constexpr (allow_throw)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter);
|
||||
return;
|
||||
}
|
||||
|
||||
char * next_pos = reinterpret_cast<char *>(memchr(peekable_buf->position(), custom_delimiter[0], peekable_buf->available()));
|
||||
if (!next_pos)
|
||||
@ -948,6 +993,9 @@ String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const For
|
||||
|
||||
template void readCSVStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
|
||||
template void readCSVStringInto<NullOutput>(NullOutput & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
|
||||
template void readCSVStringInto<String, false, false>(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
|
||||
template void readCSVStringInto<String, true, false>(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
|
||||
template void readCSVStringInto<PaddedPODArray<UInt8>, false, false>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
|
||||
|
||||
|
||||
template <typename Vector, typename ReturnType>
|
||||
@ -1069,15 +1117,18 @@ ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf)
|
||||
}
|
||||
|
||||
template void readJSONObjectPossiblyInvalid<String>(String & s, ReadBuffer & buf);
|
||||
template bool readJSONObjectPossiblyInvalid<String, bool>(String & s, ReadBuffer & buf);
|
||||
template void readJSONObjectPossiblyInvalid<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
template bool readJSONObjectPossiblyInvalid<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
|
||||
template <typename Vector>
|
||||
void readJSONArrayInto(Vector & s, ReadBuffer & buf)
|
||||
template <typename Vector, typename ReturnType>
|
||||
ReturnType readJSONArrayInto(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
readJSONObjectOrArrayPossiblyInvalid<Vector, void, '[', ']'>(s, buf);
|
||||
return readJSONObjectOrArrayPossiblyInvalid<Vector, ReturnType, '[', ']'>(s, buf);
|
||||
}
|
||||
|
||||
template void readJSONArrayInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
template void readJSONArrayInto<PaddedPODArray<UInt8>, void>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
template bool readJSONArrayInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
|
||||
|
||||
template <typename ReturnType>
|
||||
ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
|
||||
@ -1217,6 +1268,13 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
|
||||
return false;
|
||||
}
|
||||
|
||||
if constexpr (!throw_exception)
|
||||
{
|
||||
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3])
|
||||
|| !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9]))
|
||||
return false;
|
||||
}
|
||||
|
||||
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
|
||||
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
|
||||
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
|
||||
@ -1240,6 +1298,13 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
|
||||
return false;
|
||||
}
|
||||
|
||||
if constexpr (!throw_exception)
|
||||
{
|
||||
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[3]) || !isNumericASCII(s[4])
|
||||
|| !isNumericASCII(s[6]) || !isNumericASCII(s[7]))
|
||||
return false;
|
||||
}
|
||||
|
||||
hour = (s[0] - '0') * 10 + (s[1] - '0');
|
||||
minute = (s[3] - '0') * 10 + (s[4] - '0');
|
||||
second = (s[6] - '0') * 10 + (s[7] - '0');
|
||||
@ -1259,7 +1324,14 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
|
||||
{
|
||||
/// Not very efficient.
|
||||
for (const char * digit_pos = s; digit_pos < s_pos; ++digit_pos)
|
||||
{
|
||||
if constexpr (!throw_exception)
|
||||
{
|
||||
if (!isNumericASCII(*digit_pos))
|
||||
return false;
|
||||
}
|
||||
datetime = datetime * 10 + *digit_pos - '0';
|
||||
}
|
||||
}
|
||||
datetime *= negative_multiplier;
|
||||
|
||||
@ -1282,14 +1354,24 @@ template bool readDateTimeTextFallback<bool, false>(time_t &, ReadBuffer &, cons
|
||||
template bool readDateTimeTextFallback<bool, true>(time_t &, ReadBuffer &, const DateLUTImpl &);
|
||||
|
||||
|
||||
void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
template <typename ReturnType>
|
||||
ReturnType skipJSONFieldImpl(ReadBuffer & buf, StringRef name_of_field)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
if (buf.eof())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString());
|
||||
return ReturnType(false);
|
||||
}
|
||||
else if (*buf.position() == '"') /// skip double-quoted string
|
||||
{
|
||||
NullOutput sink;
|
||||
readJSONStringInto(sink, buf);
|
||||
if constexpr (throw_exception)
|
||||
readJSONStringInto(sink, buf);
|
||||
else if (!tryReadJSONStringInto(sink, buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
else if (isNumericASCII(*buf.position()) || *buf.position() == '-' || *buf.position() == '+' || *buf.position() == '.') /// skip number
|
||||
{
|
||||
@ -1298,19 +1380,32 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
|
||||
double v;
|
||||
if (!tryReadFloatText(v, buf))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Expected a number field for key '{}'", name_of_field.toString());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Expected a number field for key '{}'", name_of_field.toString());
|
||||
return ReturnType(false);
|
||||
}
|
||||
}
|
||||
else if (*buf.position() == 'n') /// skip null
|
||||
{
|
||||
assertString("null", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertString("null", buf);
|
||||
else if (!checkString("null", buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
else if (*buf.position() == 't') /// skip true
|
||||
{
|
||||
assertString("true", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertString("true", buf);
|
||||
else if (!checkString("true", buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
else if (*buf.position() == 'f') /// skip false
|
||||
{
|
||||
assertString("false", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertString("false", buf);
|
||||
else if (!checkString("false", buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
else if (*buf.position() == '[')
|
||||
{
|
||||
@ -1320,12 +1415,16 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
if (!buf.eof() && *buf.position() == ']') /// skip empty array
|
||||
{
|
||||
++buf.position();
|
||||
return;
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
skipJSONField(buf, name_of_field);
|
||||
if constexpr (throw_exception)
|
||||
skipJSONFieldImpl<ReturnType>(buf, name_of_field);
|
||||
else if (!skipJSONFieldImpl<ReturnType>(buf, name_of_field))
|
||||
return ReturnType(false);
|
||||
|
||||
skipWhitespaceIfAny(buf);
|
||||
|
||||
if (!buf.eof() && *buf.position() == ',')
|
||||
@ -1339,7 +1438,11 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
break;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
|
||||
return ReturnType(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (*buf.position() == '{') /// skip whole object
|
||||
@ -1353,19 +1456,34 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
if (*buf.position() == '"')
|
||||
{
|
||||
NullOutput sink;
|
||||
readJSONStringInto(sink, buf);
|
||||
if constexpr (throw_exception)
|
||||
readJSONStringInto(sink, buf);
|
||||
else if (!tryReadJSONStringInto(sink, buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
// ':'
|
||||
skipWhitespaceIfAny(buf);
|
||||
if (buf.eof() || !(*buf.position() == ':'))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
|
||||
return ReturnType(false);
|
||||
}
|
||||
++buf.position();
|
||||
skipWhitespaceIfAny(buf);
|
||||
|
||||
skipJSONField(buf, name_of_field);
|
||||
if constexpr (throw_exception)
|
||||
skipJSONFieldImpl<ReturnType>(buf, name_of_field);
|
||||
else if (!skipJSONFieldImpl<ReturnType>(buf, name_of_field))
|
||||
return ReturnType(false);
|
||||
|
||||
skipWhitespaceIfAny(buf);
|
||||
|
||||
// optional ','
|
||||
@ -1377,18 +1495,37 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
}
|
||||
|
||||
if (buf.eof())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString());
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString());
|
||||
return ReturnType(false);
|
||||
}
|
||||
++buf.position();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}",
|
||||
String(buf.position(), std::min(buf.available(), size_t(10))),
|
||||
std::string(1, *buf.position()),
|
||||
name_of_field.empty() ? "" : " for key " + name_of_field.toString());
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}",
|
||||
String(buf.position(), std::min(buf.available(), size_t(10))),
|
||||
std::string(1, *buf.position()),
|
||||
name_of_field.empty() ? "" : " for key " + name_of_field.toString());
|
||||
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
{
|
||||
skipJSONFieldImpl<void>(buf, name_of_field);
|
||||
}
|
||||
|
||||
bool trySkipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
{
|
||||
return skipJSONFieldImpl<bool>(buf, name_of_field);
|
||||
}
|
||||
|
||||
|
||||
@ -1601,23 +1738,31 @@ void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delim
|
||||
}
|
||||
|
||||
// Use PeekableReadBuffer to copy field to string after parsing.
|
||||
template <typename Vector, typename ParseFunc>
|
||||
static void readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_func)
|
||||
template <typename ReturnType, typename Vector, typename ParseFunc>
|
||||
static ReturnType readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_func)
|
||||
{
|
||||
PeekableReadBuffer peekable_buf(buf);
|
||||
peekable_buf.setCheckpoint();
|
||||
parse_func(peekable_buf);
|
||||
if constexpr (std::is_same_v<ReturnType, void>)
|
||||
parse_func(peekable_buf);
|
||||
else if (!parse_func(peekable_buf))
|
||||
return ReturnType(false);
|
||||
peekable_buf.makeContinuousMemoryFromCheckpointToPos();
|
||||
auto * end = peekable_buf.position();
|
||||
peekable_buf.rollbackToCheckpoint();
|
||||
s.append(peekable_buf.position(), end);
|
||||
peekable_buf.position() = end;
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
template <typename Vector>
|
||||
static void readQuotedStringFieldInto(Vector & s, ReadBuffer & buf)
|
||||
template <typename ReturnType = void, typename Vector>
|
||||
static ReturnType readQuotedStringFieldInto(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
assertChar('\'', buf);
|
||||
if constexpr (std::is_same_v<ReturnType, void>)
|
||||
assertChar('\'', buf);
|
||||
else if (!checkChar('\'', buf))
|
||||
return ReturnType(false);
|
||||
|
||||
s.push_back('\'');
|
||||
while (!buf.eof())
|
||||
{
|
||||
@ -1645,16 +1790,23 @@ static void readQuotedStringFieldInto(Vector & s, ReadBuffer & buf)
|
||||
}
|
||||
|
||||
if (buf.eof())
|
||||
return;
|
||||
return ReturnType(false);
|
||||
|
||||
++buf.position();
|
||||
s.push_back('\'');
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
template <char opening_bracket, char closing_bracket, typename Vector>
|
||||
static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
|
||||
template <typename ReturnType = void, char opening_bracket, char closing_bracket, typename Vector>
|
||||
static ReturnType readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
assertChar(opening_bracket, buf);
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
if constexpr (throw_exception)
|
||||
assertChar(opening_bracket, buf);
|
||||
else if (!checkChar(opening_bracket, buf))
|
||||
return ReturnType(false);
|
||||
|
||||
s.push_back(opening_bracket);
|
||||
|
||||
size_t balance = 1;
|
||||
@ -1670,7 +1822,10 @@ static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
|
||||
|
||||
if (*buf.position() == '\'')
|
||||
{
|
||||
readQuotedStringFieldInto(s, buf);
|
||||
if constexpr (throw_exception)
|
||||
readQuotedStringFieldInto<void>(s, buf);
|
||||
else if (!readQuotedStringFieldInto<bool>(s, buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
else if (*buf.position() == opening_bracket)
|
||||
{
|
||||
@ -1685,13 +1840,20 @@ static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
|
||||
++buf.position();
|
||||
}
|
||||
}
|
||||
|
||||
if (balance)
|
||||
return ReturnType(false);
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
template <typename Vector>
|
||||
void readQuotedFieldInto(Vector & s, ReadBuffer & buf)
|
||||
template <typename ReturnType, typename Vector>
|
||||
ReturnType readQuotedFieldInto(Vector & s, ReadBuffer & buf)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
if (buf.eof())
|
||||
return;
|
||||
return ReturnType(false);
|
||||
|
||||
/// Possible values in 'Quoted' field:
|
||||
/// - Strings: '...'
|
||||
@ -1703,35 +1865,47 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf)
|
||||
/// - Number: integer, float, decimal.
|
||||
|
||||
if (*buf.position() == '\'')
|
||||
readQuotedStringFieldInto(s, buf);
|
||||
return readQuotedStringFieldInto<ReturnType>(s, buf);
|
||||
else if (*buf.position() == '[')
|
||||
readQuotedFieldInBracketsInto<'[', ']'>(s, buf);
|
||||
return readQuotedFieldInBracketsInto<ReturnType, '[', ']'>(s, buf);
|
||||
else if (*buf.position() == '(')
|
||||
readQuotedFieldInBracketsInto<'(', ')'>(s, buf);
|
||||
return readQuotedFieldInBracketsInto<ReturnType, '(', ')'>(s, buf);
|
||||
else if (*buf.position() == '{')
|
||||
readQuotedFieldInBracketsInto<'{', '}'>(s, buf);
|
||||
return readQuotedFieldInBracketsInto<ReturnType, '{', '}'>(s, buf);
|
||||
else if (checkCharCaseInsensitive('n', buf))
|
||||
{
|
||||
/// NULL or NaN
|
||||
if (checkCharCaseInsensitive('u', buf))
|
||||
{
|
||||
assertStringCaseInsensitive("ll", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertStringCaseInsensitive("ll", buf);
|
||||
else if (!checkStringCaseInsensitive("ll", buf))
|
||||
return ReturnType(false);
|
||||
s.append("NULL");
|
||||
}
|
||||
else
|
||||
{
|
||||
assertStringCaseInsensitive("an", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertStringCaseInsensitive("an", buf);
|
||||
else if (!checkStringCaseInsensitive("an", buf))
|
||||
return ReturnType(false);
|
||||
s.append("NaN");
|
||||
}
|
||||
}
|
||||
else if (checkCharCaseInsensitive('t', buf))
|
||||
{
|
||||
assertStringCaseInsensitive("rue", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertStringCaseInsensitive("rue", buf);
|
||||
else if (!checkStringCaseInsensitive("rue", buf))
|
||||
return ReturnType(false);
|
||||
s.append("true");
|
||||
}
|
||||
else if (checkCharCaseInsensitive('f', buf))
|
||||
{
|
||||
assertStringCaseInsensitive("alse", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertStringCaseInsensitive("alse", buf);
|
||||
else if (!checkStringCaseInsensitive("alse", buf))
|
||||
return ReturnType(false);
|
||||
s.append("false");
|
||||
}
|
||||
else
|
||||
@ -1740,13 +1914,19 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf)
|
||||
auto parse_func = [](ReadBuffer & in)
|
||||
{
|
||||
Float64 tmp;
|
||||
readFloatText(tmp, in);
|
||||
if constexpr (throw_exception)
|
||||
readFloatText(tmp, in);
|
||||
else
|
||||
return tryReadFloatText(tmp, in);
|
||||
};
|
||||
readParsedValueInto(s, buf, parse_func);
|
||||
|
||||
return readParsedValueInto<ReturnType>(s, buf, parse_func);
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
template void readQuotedFieldInto<NullOutput>(NullOutput & s, ReadBuffer & buf);
|
||||
template void readQuotedFieldInto<void, NullOutput>(NullOutput & s, ReadBuffer & buf);
|
||||
|
||||
void readQuotedField(String & s, ReadBuffer & buf)
|
||||
{
|
||||
@ -1754,11 +1934,24 @@ void readQuotedField(String & s, ReadBuffer & buf)
|
||||
readQuotedFieldInto(s, buf);
|
||||
}
|
||||
|
||||
bool tryReadQuotedField(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
return readQuotedFieldInto<bool>(s, buf);
|
||||
}
|
||||
|
||||
void readJSONField(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
auto parse_func = [](ReadBuffer & in) { skipJSONField(in, ""); };
|
||||
readParsedValueInto(s, buf, parse_func);
|
||||
readParsedValueInto<void>(s, buf, parse_func);
|
||||
}
|
||||
|
||||
bool tryReadJSONField(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
auto parse_func = [](ReadBuffer & in) { return trySkipJSONField(in, ""); };
|
||||
return readParsedValueInto<bool>(s, buf, parse_func);
|
||||
}
|
||||
|
||||
void readTSVField(String & s, ReadBuffer & buf)
|
||||
|
@ -258,26 +258,43 @@ inline void readBoolText(bool & x, ReadBuffer & buf)
|
||||
x = tmp != '0';
|
||||
}
|
||||
|
||||
inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case = false)
|
||||
template <typename ReturnType = void>
|
||||
inline ReturnType readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case = false)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
if (buf.eof()) [[unlikely]]
|
||||
throwReadAfterEOF();
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throwReadAfterEOF();
|
||||
else
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
switch (*buf.position())
|
||||
{
|
||||
case 't':
|
||||
assertString("true", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertString("true", buf);
|
||||
else if (!checkString("true", buf))
|
||||
return ReturnType(false);
|
||||
x = true;
|
||||
break;
|
||||
case 'f':
|
||||
assertString("false", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertString("false", buf);
|
||||
else if (!checkString("false", buf))
|
||||
return ReturnType(false);
|
||||
x = false;
|
||||
break;
|
||||
case 'T':
|
||||
{
|
||||
if (support_upper_case)
|
||||
{
|
||||
assertString("TRUE", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertString("TRUE", buf);
|
||||
else if (!checkString("TRUE", buf))
|
||||
return ReturnType(false);
|
||||
x = true;
|
||||
break;
|
||||
}
|
||||
@ -288,7 +305,10 @@ inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case
|
||||
{
|
||||
if (support_upper_case)
|
||||
{
|
||||
assertString("FALSE", buf);
|
||||
if constexpr (throw_exception)
|
||||
assertString("FALSE", buf);
|
||||
else if (!checkString("FALSE", buf))
|
||||
return ReturnType(false);
|
||||
x = false;
|
||||
break;
|
||||
}
|
||||
@ -296,8 +316,15 @@ inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case
|
||||
[[fallthrough]];
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Unexpected Bool value");
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Unexpected Bool value");
|
||||
else
|
||||
return ReturnType(false);
|
||||
}
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
enum class ReadIntTextCheckOverflow
|
||||
@ -469,7 +496,10 @@ void readIntText(T & x, ReadBuffer & buf)
|
||||
template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T>
|
||||
bool tryReadIntText(T & x, ReadBuffer & buf)
|
||||
{
|
||||
return readIntTextImpl<T, bool, check_overflow>(x, buf);
|
||||
if constexpr (is_decimal<T>)
|
||||
return tryReadIntText<check_overflow>(x.value, buf);
|
||||
else
|
||||
return readIntTextImpl<T, bool, check_overflow>(x, buf);
|
||||
}
|
||||
|
||||
|
||||
@ -478,16 +508,18 @@ bool tryReadIntText(T & x, ReadBuffer & buf)
|
||||
* - for numbers starting with zero, parsed only zero;
|
||||
* - symbol '+' before number is not supported;
|
||||
*/
|
||||
template <typename T, bool throw_on_error = true>
|
||||
void readIntTextUnsafe(T & x, ReadBuffer & buf)
|
||||
template <typename T, typename ReturnType = void>
|
||||
ReturnType readIntTextUnsafe(T & x, ReadBuffer & buf)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
bool negative = false;
|
||||
make_unsigned_t<T> res = 0;
|
||||
|
||||
auto on_error = []
|
||||
{
|
||||
if (throw_on_error)
|
||||
if constexpr (throw_exception)
|
||||
throwReadAfterEOF();
|
||||
return ReturnType(false);
|
||||
};
|
||||
|
||||
if (buf.eof()) [[unlikely]]
|
||||
@ -505,7 +537,7 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
|
||||
{
|
||||
++buf.position();
|
||||
x = 0;
|
||||
return;
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
while (!buf.eof())
|
||||
@ -524,12 +556,13 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
|
||||
|
||||
/// See note about undefined behaviour above.
|
||||
x = is_signed_v<T> && negative ? -res : res;
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void tryReadIntTextUnsafe(T & x, ReadBuffer & buf)
|
||||
bool tryReadIntTextUnsafe(T & x, ReadBuffer & buf)
|
||||
{
|
||||
return readIntTextUnsafe<T, false>(x, buf);
|
||||
return readIntTextUnsafe<T, bool>(x, buf);
|
||||
}
|
||||
|
||||
|
||||
@ -551,9 +584,15 @@ void readEscapedString(String & s, ReadBuffer & buf);
|
||||
void readQuotedString(String & s, ReadBuffer & buf);
|
||||
void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
|
||||
|
||||
bool tryReadQuotedString(String & s, ReadBuffer & buf);
|
||||
bool tryReadQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
|
||||
|
||||
void readDoubleQuotedString(String & s, ReadBuffer & buf);
|
||||
void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
|
||||
|
||||
bool tryReadDoubleQuotedString(String & s, ReadBuffer & buf);
|
||||
bool tryReadDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
|
||||
|
||||
void readJSONString(String & s, ReadBuffer & buf);
|
||||
|
||||
void readBackQuotedString(String & s, ReadBuffer & buf);
|
||||
@ -616,7 +655,7 @@ void readBackQuotedStringInto(Vector & s, ReadBuffer & buf);
|
||||
template <typename Vector>
|
||||
void readStringUntilEOFInto(Vector & s, ReadBuffer & buf);
|
||||
|
||||
template <typename Vector, bool include_quotes = false>
|
||||
template <typename Vector, bool include_quotes = false, bool allow_throw = true>
|
||||
void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
|
||||
|
||||
/// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception.
|
||||
@ -629,7 +668,7 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
|
||||
return readJSONStringInto<Vector, bool>(s, buf);
|
||||
}
|
||||
|
||||
template <typename Vector>
|
||||
template <bool enable_sql_style_quoting, typename Vector>
|
||||
bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf);
|
||||
|
||||
/// Reads chunk of data between {} in that way,
|
||||
@ -638,8 +677,8 @@ bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf);
|
||||
template <typename Vector, typename ReturnType = void>
|
||||
ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf);
|
||||
|
||||
template <typename Vector>
|
||||
void readJSONArrayInto(Vector & s, ReadBuffer & buf);
|
||||
template <typename Vector, typename ReturnType = void>
|
||||
ReturnType readJSONArrayInto(Vector & s, ReadBuffer & buf);
|
||||
|
||||
template <typename Vector>
|
||||
void readStringUntilWhitespaceInto(Vector & s, ReadBuffer & buf);
|
||||
@ -963,6 +1002,13 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
|
||||
{
|
||||
if (s[4] < '0' || s[4] > '9')
|
||||
{
|
||||
if constexpr (!throw_exception)
|
||||
{
|
||||
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3])
|
||||
|| !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9]))
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
|
||||
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
|
||||
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
|
||||
@ -975,6 +1021,13 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
|
||||
bool dt_long = (s[10] == ' ' || s[10] == 'T');
|
||||
if (dt_long)
|
||||
{
|
||||
if constexpr (!throw_exception)
|
||||
{
|
||||
if (!isNumericASCII(s[11]) || !isNumericASCII(s[12]) || !isNumericASCII(s[14]) || !isNumericASCII(s[15])
|
||||
|| !isNumericASCII(s[17]) || !isNumericASCII(s[18]))
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
hour = (s[11] - '0') * 10 + (s[12] - '0');
|
||||
minute = (s[14] - '0') * 10 + (s[15] - '0');
|
||||
second = (s[17] - '0') * 10 + (s[18] - '0');
|
||||
@ -1312,6 +1365,11 @@ inline bool tryReadText(is_integer auto & x, ReadBuffer & buf)
|
||||
return tryReadIntText(x, buf);
|
||||
}
|
||||
|
||||
inline bool tryReadText(is_floating_point auto & x, ReadBuffer & buf)
|
||||
{
|
||||
return tryReadFloatText(x, buf);
|
||||
}
|
||||
|
||||
inline bool tryReadText(UUID & x, ReadBuffer & buf) { return tryReadUUIDText(x, buf); }
|
||||
inline bool tryReadText(IPv4 & x, ReadBuffer & buf) { return tryReadIPv4Text(x, buf); }
|
||||
inline bool tryReadText(IPv6 & x, ReadBuffer & buf) { return tryReadIPv6Text(x, buf); }
|
||||
@ -1321,9 +1379,20 @@ inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatTe
|
||||
inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); }
|
||||
|
||||
inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readDateText(x, buf, time_zone); }
|
||||
inline bool tryReadText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { return tryReadDateText(x, buf, time_zone); }
|
||||
|
||||
inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); }
|
||||
inline bool tryReadText(LocalDate & x, ReadBuffer & buf) { return tryReadDateText(x, buf); }
|
||||
inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); }
|
||||
inline bool tryReadText(LocalDateTime & x, ReadBuffer & buf)
|
||||
{
|
||||
time_t time;
|
||||
if (!tryReadDateTimeText(time, buf))
|
||||
return false;
|
||||
x = LocalDateTime(time, DateLUT::instance());
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); }
|
||||
inline void readText(IPv4 & x, ReadBuffer & buf) { readIPv4Text(x, buf); }
|
||||
inline void readText(IPv6 & x, ReadBuffer & buf) { readIPv6Text(x, buf); }
|
||||
@ -1401,39 +1470,71 @@ inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf)
|
||||
}
|
||||
|
||||
/// CSV for numbers: quotes are optional, no special escaping rules.
|
||||
template <typename T>
|
||||
inline void readCSVSimple(T & x, ReadBuffer & buf)
|
||||
template <typename T, typename ReturnType = void>
|
||||
inline ReturnType readCSVSimple(T & x, ReadBuffer & buf)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
if (buf.eof()) [[unlikely]]
|
||||
throwReadAfterEOF();
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throwReadAfterEOF();
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
char maybe_quote = *buf.position();
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
++buf.position();
|
||||
|
||||
readText(x, buf);
|
||||
if constexpr (throw_exception)
|
||||
readText(x, buf);
|
||||
else if (!tryReadText(x, buf))
|
||||
return ReturnType(false);
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
assertChar(maybe_quote, buf);
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
assertChar(maybe_quote, buf);
|
||||
else if (!checkChar(maybe_quote, buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
// standalone overload for dates: to avoid instantiating DateLUTs while parsing other types
|
||||
template <typename T>
|
||||
inline void readCSVSimple(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone)
|
||||
template <typename T, typename ReturnType = void>
|
||||
inline ReturnType readCSVSimple(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
if (buf.eof()) [[unlikely]]
|
||||
throwReadAfterEOF();
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throwReadAfterEOF();
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
char maybe_quote = *buf.position();
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
++buf.position();
|
||||
|
||||
readText(x, buf, time_zone);
|
||||
if constexpr (throw_exception)
|
||||
readText(x, buf, time_zone);
|
||||
else if (!tryReadText(x, buf, time_zone))
|
||||
return ReturnType(false);
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
assertChar(maybe_quote, buf);
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
assertChar(maybe_quote, buf);
|
||||
else if (!checkChar(maybe_quote, buf))
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
return ReturnType(true);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -1443,18 +1544,52 @@ inline void readCSV(T & x, ReadBuffer & buf)
|
||||
readCSVSimple(x, buf);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_arithmetic_v<T>
|
||||
inline bool tryReadCSV(T & x, ReadBuffer & buf)
|
||||
{
|
||||
return readCSVSimple<T, bool>(x, buf);
|
||||
}
|
||||
|
||||
inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); }
|
||||
inline bool tryReadCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings)
|
||||
{
|
||||
x.clear();
|
||||
readCSVStringInto<String, false, false>(x, buf, settings);
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(LocalDate & x, ReadBuffer & buf) { return readCSVSimple<LocalDate, bool>(x, buf); }
|
||||
|
||||
inline void readCSV(DayNum & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(DayNum & x, ReadBuffer & buf) { return readCSVSimple<DayNum, bool>(x, buf); }
|
||||
inline void readCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readCSVSimple(x, buf, time_zone); }
|
||||
inline bool tryReadCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { return readCSVSimple<DayNum, bool>(x, buf, time_zone); }
|
||||
|
||||
inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(LocalDateTime & x, ReadBuffer & buf) { return readCSVSimple<LocalDateTime, bool>(x, buf); }
|
||||
|
||||
inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(UUID & x, ReadBuffer & buf) { return readCSVSimple<UUID, bool>(x, buf); }
|
||||
|
||||
inline void readCSV(IPv4 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(IPv4 & x, ReadBuffer & buf) { return readCSVSimple<IPv4, bool>(x, buf); }
|
||||
|
||||
inline void readCSV(IPv6 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(IPv6 & x, ReadBuffer & buf) { return readCSVSimple<IPv6, bool>(x, buf); }
|
||||
|
||||
inline void readCSV(UInt128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(UInt128 & x, ReadBuffer & buf) { return readCSVSimple<UInt128, bool>(x, buf); }
|
||||
|
||||
inline void readCSV(Int128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(Int128 & x, ReadBuffer & buf) { return readCSVSimple<Int128, bool>(x, buf); }
|
||||
|
||||
inline void readCSV(UInt256 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(UInt256 & x, ReadBuffer & buf) { return readCSVSimple<UInt256, bool>(x, buf); }
|
||||
|
||||
inline void readCSV(Int256 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline bool tryReadCSV(Int256 & x, ReadBuffer & buf) { return readCSVSimple<Int256, bool>(x, buf); }
|
||||
|
||||
template <typename T>
|
||||
void readBinary(std::vector<T> & x, ReadBuffer & buf)
|
||||
@ -1536,6 +1671,7 @@ inline void skipWhitespaceIfAny(ReadBuffer & buf, bool one_line = false)
|
||||
|
||||
/// Skips json value.
|
||||
void skipJSONField(ReadBuffer & buf, StringRef name_of_field);
|
||||
bool trySkipJSONField(ReadBuffer & buf, StringRef name_of_field);
|
||||
|
||||
|
||||
/** Read serialized exception.
|
||||
@ -1750,12 +1886,14 @@ struct PcgDeserializer
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Vector>
|
||||
void readQuotedFieldInto(Vector & s, ReadBuffer & buf);
|
||||
template <typename ReturnType = void, typename Vector>
|
||||
ReturnType readQuotedFieldInto(Vector & s, ReadBuffer & buf);
|
||||
|
||||
void readQuotedField(String & s, ReadBuffer & buf);
|
||||
bool tryReadQuotedField(String & s, ReadBuffer & buf);
|
||||
|
||||
void readJSONField(String & s, ReadBuffer & buf);
|
||||
bool tryReadJSONField(String & s, ReadBuffer & buf);
|
||||
|
||||
void readTSVField(String & s, ReadBuffer & buf);
|
||||
|
||||
|
@ -224,4 +224,24 @@ inline void readCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint
|
||||
assertChar(maybe_quote, buf);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool tryReadCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)
|
||||
{
|
||||
if (buf.eof())
|
||||
return false;
|
||||
|
||||
char maybe_quote = *buf.position();
|
||||
|
||||
if (maybe_quote == '\'' || maybe_quote == '\"')
|
||||
++buf.position();
|
||||
|
||||
if (!tryReadDecimalText(buf, x, precision, scale))
|
||||
return false;
|
||||
|
||||
if ((maybe_quote == '\'' || maybe_quote == '\"') && !checkChar(maybe_quote, buf))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user