Merge pull request #58047 from Avogar/variant-data-type

Implement Variant data type
This commit is contained in:
Kruglov Pavel 2024-01-29 11:36:08 +01:00 committed by GitHub
commit 6858d2f4ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
136 changed files with 12868 additions and 620 deletions

View File

@ -5176,6 +5176,95 @@ When set to `false` than all attempts are made with identical timeouts.
Default value: `true`.
## allow_experimental_variant_type {#allow_experimental_variant_type}
Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md).
Default value: `false`.
## use_variant_as_common_type {#use_variant_as_common_type}
Allows to use `Variant` type as a result type for [if](../../sql-reference/functions/conditional-functions.md/#if)/[multiIf](../../sql-reference/functions/conditional-functions.md/#multiif)/[array](../../sql-reference/functions/array-functions.md)/[map](../../sql-reference/functions/tuple-map-functions.md) functions when there is no common type for argument types.
Example:
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(if(number % 2, number, range(number))) as variant_type FROM numbers(1);
SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
```
```text
┌─variant_type───────────────────┐
│ Variant(Array(UInt64), UInt64) │
└────────────────────────────────┘
┌─variant───┐
│ [] │
│ 1 │
│ [0,1] │
│ 3 │
│ [0,1,2,3] │
└───────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL)) AS variant_type FROM numbers(1);
SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
```
```text
─variant_type─────────────────────────┐
│ Variant(Array(UInt8), String, UInt8) │
└──────────────────────────────────────┘
┌─variant───────┐
│ 42 │
│ [1,2,3] │
│ Hello, World! │
│ ᴺᵁᴸᴸ │
└───────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(array(range(number), number, 'str_' || toString(number))) as array_of_variants_type from numbers(1);
SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
```
```text
┌─array_of_variants_type────────────────────────┐
│ Array(Variant(Array(UInt64), String, UInt64)) │
└───────────────────────────────────────────────┘
┌─array_of_variants─┐
│ [[],0,'str_0'] │
│ [[0],1,'str_1'] │
│ [[0,1],2,'str_2'] │
└───────────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT toTypeName(map('a', range(number), 'b', number, 'c', 'str_' || toString(number))) as map_of_variants_type from numbers(1);
SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
```
```text
┌─map_of_variants_type────────────────────────────────┐
│ Map(String, Variant(Array(UInt64), String, UInt64)) │
└─────────────────────────────────────────────────────┘
┌─map_of_variants───────────────┐
│ {'a':[],'b':0,'c':'str_0'} │
│ {'a':[0],'b':1,'c':'str_1'} │
│ {'a':[0,1],'b':2,'c':'str_2'} │
└───────────────────────────────┘
```
Default value: `false`.
## max_partition_size_to_drop
Restriction on dropping partitions in query time.

View File

@ -0,0 +1,245 @@
---
slug: /en/sql-reference/data-types/json
sidebar_position: 55
sidebar_label: Variant
---
# Variant(T1, T2, T3, ...)
This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type
has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value).
The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types.
:::note
The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`.
:::
## Creating Variant
Using `Variant` type in table column definition:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v FROM test;
```
```text
┌─v─────────────┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ Hello, World! │
│ [1,2,3] │
└───────────────┘
```
Using CAST from ordinary columns:
```sql
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
```
```text
┌─type_name──────────────────────────────┬─variant───────┐
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
└────────────────────────────────────────┴───────────────┘
```
Using functions `if/multiIf` when arguments don't have common type (setting `use_variant_as_common_type` should be enabled for it):
```sql
SET use_variant_as_common_type = 1;
SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
```
```text
┌─variant───┐
│ [] │
│ 1 │
│ [0,1] │
│ 3 │
│ [0,1,2,3] │
└───────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
```
```text
┌─variant───────┐
│ 42 │
│ [1,2,3] │
│ Hello, World! │
│ ᴺᵁᴸᴸ │
└───────────────┘
```
Using functions 'array/map' if array elements/map values don't have common type (setting `use_variant_as_common_type` should be enabled for it):
```sql
SET use_variant_as_common_type = 1;
SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
```
```text
┌─array_of_variants─┐
│ [[],0,'str_0'] │
│ [[0],1,'str_1'] │
│ [[0,1],2,'str_2'] │
└───────────────────┘
```
```sql
SET use_variant_as_common_type = 1;
SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
```
```text
┌─map_of_variants───────────────┐
│ {'a':[],'b':0,'c':'str_0'} │
│ {'a':[0],'b':1,'c':'str_1'} │
│ {'a':[0,1],'b':2,'c':'str_2'} │
└───────────────────────────────┘
```
## Reading Variant nested types as subcolumns
Variant type supports reading a single nested type from a Variant column using the type name as a subcolumn.
So, if you have column `variant Variant(T1, T2, T3)` you can read a subcolumn of type `T2` using syntax `variant.T2`,
this subcolumn will have type `Nullable(T2)` if `T2` can be inside `Nullable` and `T2` otherwise. This subcolumn will
be the same size as original `Variant` column and will contain `NULL` values (or empty values if `T2` cannot be inside `Nullable`)
in all rows in which original `Variant` column doesn't have type `T2`.
Variant subcolumns can be also read using function `variantElement(variant_column, type_name)`.
Examples:
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v, v.String, v.UInt64, v.`Array(UInt64)` FROM test;
```
```text
┌─v─────────────┬─v.String──────┬─v.UInt64─┬─v.Array(UInt64)─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴───────────────┴──────────┴─────────────────┘
```
```sql
SELECT toTypeName(v.String), toTypeName(v.UInt64), toTypeName(v.`Array(UInt64)`) FROM test LIMIT 1;
```
```text
┌─toTypeName(v.String)─┬─toTypeName(v.UInt64)─┬─toTypeName(v.Array(UInt64))─┐
│ Nullable(String) │ Nullable(UInt64) │ Array(UInt64) │
└──────────────────────┴──────────────────────┴─────────────────────────────┘
```
```sql
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;
```
```text
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
```
## Conversion between Variant column and other columns
There are 3 possible conversions that can be performed with Variant column.
### Converting an ordinary column to a Variant column
It is possible to convert ordinary column with type `T` to a `Variant` column containing this type:
```sql
SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant;
```
```text
┌─type_name──────────────────────────────┬─variant───────┐
│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │
└────────────────────────────────────────┴───────────────┘
```
### Converting a Variant column to an ordinary column
It is possible to convert a `Variant` column to an ordinary column. In this case all nested variants will be converted to a destination type:
```sql
CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('42.42');
SELECT v::Nullable(Float64) FROM test;
```
```text
┌─CAST(v, 'Nullable(Float64)')─┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ 42.42 │
└──────────────────────────────┘
```
### Converting a Variant to another Variant
It is possible to convert a `Variant` column to another `Variant` column, but only if the destination `Variant` column contains all nested types from the original `Variant`:
```sql
CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('String');
SELECT v::Variant(UInt64, String, Array(UInt64)) FROM test;
```
```text
┌─CAST(v, 'Variant(UInt64, String, Array(UInt64))')─┐
│ ᴺᵁᴸᴸ │
│ 42 │
│ String │
└───────────────────────────────────────────────────┘
```
## Reading Variant type from the data
All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Variant` type. During data parsing ClickHouse tries to insert value into most appropriate variant type.
Example:
```sql
SELECT
v,
variantElement(v, 'String') AS str,
variantElement(v, 'UInt64') AS num,
variantElement(v, 'Float64') AS float,
variantElement(v, 'DateTime') AS date,
variantElement(v, 'Array(UInt64)') AS arr
FROM format(JSONEachRow, 'v Variant(String, UInt64, Float64, DateTime, Array(UInt64))', $$
{"v" : "Hello, World!"},
{"v" : 42},
{"v" : 42.42},
{"v" : "2020-01-01 00:00:00"},
{"v" : [1, 2, 3]}
$$)
```
```text
┌─v───────────────────┬─str───────────┬──num─┬─float─┬────────────────date─┬─arr─────┐
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42.42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │
│ 2020-01-01 00:00:00 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 00:00:00 │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘
```

View File

@ -2832,6 +2832,43 @@ Result:
└─────────────────────────────────────────────────────────────────────────┘
```
## variantElement
Extracts a column with specified type from a `Variant` column.
**Syntax**
```sql
variantElement(variant, type_name, [, default_value])
```
**Arguments**
- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md).
- `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional.
**Returned value**
- Subcolumn of a `Variant` column with specified type.
**Example**
```sql
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;
```
```text
┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │
│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │
│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘
```
## minSampleSizeConversion
Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples.

View File

@ -181,6 +181,23 @@ public:
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
{
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
String subcolumn_name;
if (variant_element_constant_value.getType() != Field::Types::String)
return;
subcolumn_name = variant_element_constant_value.get<const String &>();
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapContains" && column_type.isMap())
{
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);

View File

@ -159,4 +159,26 @@ void ColumnConst::compareColumn(
std::fill(compare_results.begin(), compare_results.end(), res);
}
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value)
{
auto data = column->cloneEmpty();
data->insert(value);
return ColumnConst::create(std::move(data), 1);
}
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, size_t const_value_index)
{
auto data = column->cloneEmpty();
data->insertFrom(*column, const_value_index);
return ColumnConst::create(std::move(data), 1);
}
ColumnConst::Ptr createColumnConstWithDefaultValue(const ColumnPtr & column)
{
auto data = column->cloneEmpty();
data->insertDefault();
return ColumnConst::create(std::move(data), 1);
}
}

View File

@ -292,4 +292,9 @@ public:
bool isCollationSupported() const override { return data->isCollationSupported(); }
};
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value);
ColumnConst::Ptr createColumnConst(const ColumnPtr & column, size_t const_value_index);
ColumnConst::Ptr createColumnConstWithDefaultValue(const ColumnPtr &column);
}

View File

@ -141,6 +141,11 @@ void ColumnMap::updateHashFast(SipHash & hash) const
nested->updateHashFast(hash);
}
void ColumnMap::insertFrom(const IColumn & src, size_t n)
{
nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n);
}
void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
nested->insertRangeFrom(

View File

@ -64,6 +64,7 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;

View File

@ -833,24 +833,22 @@ void ColumnNullable::checkConsistency() const
"Logical error: Sizes of nested column and null map of Nullable column are not equal");
}
ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
{
ColumnPtr new_values;
ColumnPtr new_null_map;
if (default_field.getType() == Field::Types::Null)
const ColumnNullable & nullable_column_with_default_value = assert_cast<const ColumnNullable &>(column_with_default_value.getDataColumn());
if (nullable_column_with_default_value.isNullAt(0))
{
auto default_column = nested_column->cloneEmpty();
default_column->insertDefault();
/// Value in main column, when null map is 1 is implementation defined. So, take any value.
new_values = nested_column->createWithOffsets(offsets, (*default_column)[0], total_rows, shift);
new_null_map = null_map->createWithOffsets(offsets, Field(1u), total_rows, shift);
new_values = nested_column->createWithOffsets(offsets, *createColumnConstWithDefaultValue(nested_column), total_rows, shift);
new_null_map = null_map->createWithOffsets(offsets, *createColumnConst(null_map, Field(1u)), total_rows, shift);
}
else
{
new_values = nested_column->createWithOffsets(offsets, default_field, total_rows, shift);
new_null_map = null_map->createWithOffsets(offsets, Field(0u), total_rows, shift);
new_values = nested_column->createWithOffsets(offsets, *ColumnConst::create(nullable_column_with_default_value.getNestedColumnPtr(), 1), total_rows, shift);
new_null_map = null_map->createWithOffsets(offsets, *createColumnConst(null_map, Field(0u)), total_rows, shift);
}
return ColumnNullable::create(new_values, new_null_map);
@ -896,10 +894,7 @@ ColumnPtr makeNullable(const ColumnPtr & column)
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column)
{
if (isColumnNullable(*column))
return column;
if (isColumnLowCardinalityNullable(*column))
if (isColumnNullableOrLowCardinalityNullable(*column))
return column;
if (isColumnConst(*column))
@ -925,4 +920,21 @@ ColumnPtr makeNullableSafe(const ColumnPtr & column)
return column;
}
ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column)
{
if (isColumnNullableOrLowCardinalityNullable(*column))
return column;
if (isColumnConst(*column))
return ColumnConst::create(makeNullableOrLowCardinalityNullableSafe(assert_cast<const ColumnConst &>(*column).getDataColumnPtr()), column->size());
if (column->lowCardinality())
return assert_cast<const ColumnLowCardinality &>(*column).cloneNullable();
if (column->canBeInsideNullable())
return makeNullable(column);
return column;
}
}

View File

@ -168,7 +168,7 @@ public:
getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
}
ColumnPtr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override;
ColumnPtr createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
bool isNullable() const override { return true; }
bool isFixedAndContiguous() const override { return false; }
@ -232,5 +232,6 @@ private:
ColumnPtr makeNullable(const ColumnPtr & column);
ColumnPtr makeNullableSafe(const ColumnPtr & column);
ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column);
ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column);
}

View File

@ -2,6 +2,7 @@
#include <Columns/ColumnObject.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Common/iota.h>
#include <DataTypes/ObjectUtils.h>
#include <DataTypes/getLeastSupertype.h>
@ -475,7 +476,7 @@ void ColumnObject::Subcolumn::finalize()
{
auto values = part->index(*offsets, offsets->size());
values = castColumn({values, from_type, ""}, to_type);
part = values->createWithOffsets(offsets_data, to_type->getDefault(), part_size, /*shift=*/ 0);
part = values->createWithOffsets(offsets_data, *createColumnConstWithDefaultValue(result_column->getPtr()), part_size, /*shift=*/ 0);
}
}

View File

@ -1,6 +1,7 @@
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Common/HashTable/Hash.h>
@ -130,7 +131,7 @@ StringRef ColumnSparse::getDataAt(size_t n) const
ColumnPtr ColumnSparse::convertToFullColumnIfSparse() const
{
return values->createWithOffsets(getOffsetsData(), (*values)[0], _size, /*shift=*/ 1);
return values->createWithOffsets(getOffsetsData(), *createColumnConst(values, 0), _size, /*shift=*/ 1);
}
void ColumnSparse::insertSingleValue(const Inserter & inserter)

File diff suppressed because it is too large Load Diff

307
src/Columns/ColumnVariant.h Normal file
View File

@ -0,0 +1,307 @@
#pragma once
#include <Columns/IColumn.h>
#include <Columns/ColumnVector.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
/**
* Column for storing Variant(...) type values.
* Variant type represents a union of other data types.
* For example, type Variant(T1, T2, ..., TN) means that each row of this type
* has a value of either type T1 or T2 or ... or TN or none of them (NULL value)
*
* ColumnVariant stores:
* - The discriminators column, which determines which variant is stored in each row.
* - The offsets column, which determines the offset in the corresponding variant column in each row.
* - The list of variant columns with only real values (so the sizes of variant columns can be different).
* Discriminator is an index of a variant in the variants list, it also has special value called NULL_DISCRIMINATOR
* that indicates that the value in the row is NULL.
*
* We want to be able to extend Variant column for free without rewriting the data, but as we don't care about the
* order of variants during Variant creation (we want Variant(T1, T2) to be the same as Variant(T2, T1)), we support
* some global order of nested types inside Variant during type creation, so after extension the order of variant types
* (and so their discriminators) can change. For example: Variant(T1, T3) -> Variant(T1, T2, T3).
* To avoid full rewrite of discriminators column on Variant extension, we differentiate local order of variants
* inside a column and global order of variants created during type creation. So, ColumnVariant stores only local
* discriminators and additionally stores the mapping between global and local discriminators.
* So, when we need to extend Variant column with new variant, we can just append it to a list of variant columns
* with new local discriminator and update mapping from global to local orders.
*
* Note that two instances of ColumnVariant can have different local orders, so we should always use global
* discriminators during inter-column interactions.
*
* Let's take an example with type Variant(UInt32, String, Array(UInt32)):
* During type creation we will sort types by their names and get the global order: Array(UInt32), String, UInt32.
* So, type Array(UInt32) will have global discriminator 0, String - 1 and UInt32 - 2.
* Let's say we have a column with local order (String, UInt32, Array(UInt32)) and values:
* 'Hello', 42, NULL, 'World', 43, [1, 2, 3], NULL, 44
*
* Let's see how these values will be stored in ColumnVariant:
*
* local_to_global_discriminators: {0 : 1, 1 : 2, 2 : 0}
* global_to_local_discriminators: {0 : 2, 1 : 0, 2 : 1}
* local_discriminators offsets String UInt32 Array(UInt32)
* 0 0 'Hello' 42 [1, 2, 3]
* 1 0 'World' 43
* NULL_DISCRIMINATOR 0 44
* 0 1
* 1 1
* 2 0
* NULL_DISCRIMINATOR 0
* 1 2
*
*/
class ColumnVariant final : public COWHelper<IColumn, ColumnVariant>
{
public:
using Discriminator = UInt8;
using Discriminators = PaddedPODArray<Discriminator>;
using ColumnDiscriminators = ColumnVector<Discriminator>;
using ColumnOffsets = ColumnVector<Offset>;
static constexpr UInt8 NULL_DISCRIMINATOR = std::numeric_limits<Discriminator>::max(); /// 255
static constexpr size_t MAX_NESTED_COLUMNS = std::numeric_limits<Discriminator>::max(); /// 255
private:
friend class COWHelper<IColumn, ColumnVariant>;
using NestedColumns = std::vector<WrappedPtr>;
/// Create an empty column with provided variants.
/// Variants are in global order.
explicit ColumnVariant(MutableColumns && variants_);
/// Variants are in local order according to provided mapping.
explicit ColumnVariant(MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
/// Create column from discriminators column and list of variant columns.
/// Offsets column should be constructed according to the discriminators.
/// Variants are in global order.
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumns && variants_);
/// Variants are in local order according to provided mapping.
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
/// Create column from discriminators column, offsets column and list of variant columns.
/// Variants are in global order.
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_);
/// Variants are in local order according to provided mapping.
ColumnVariant(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
ColumnVariant(const ColumnVariant &) = default;
public:
/** Create immutable column using immutable arguments. This arguments may be shared with other variants.
* Use IColumn::mutate in order to make mutable column and mutate shared nested variants.
*/
using Base = COWHelper<IColumn, ColumnVariant>;
static Ptr create(const Columns & variants_) { return create(variants_, {}); }
static Ptr create(const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
static Ptr create(const ColumnPtr & local_discriminators_, const Columns & variants_) { return create(local_discriminators_, variants_, {}); }
static Ptr create(const ColumnPtr & local_discriminators_, const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
static Ptr create(const ColumnPtr & local_discriminators_, const DB::ColumnPtr & offsets_, const Columns & variants_) { return create(local_discriminators_, offsets_, variants_, {}); }
static Ptr create(const ColumnPtr & local_discriminators_, const DB::ColumnPtr & offsets_, const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
static MutablePtr create(MutableColumns && variants_)
{
return Base::create(std::move(variants_));
}
static MutablePtr create(MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
{
return Base::create(std::move(variants_), local_to_global_discriminators_);
}
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumns && variants_)
{
return Base::create(std::move(local_discriminators_), std::move(variants_));
}
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
{
return Base::create(std::move(local_discriminators_), std::move(variants_), local_to_global_discriminators_);
}
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_)
{
return Base::create(std::move(local_discriminators_), std::move(offsets_), std::move(variants_));
}
static MutablePtr create(MutableColumnPtr local_discriminators_, MutableColumnPtr offsets_, MutableColumns && variants_, const std::vector<Discriminator> & local_to_global_discriminators_)
{
return Base::create(std::move(local_discriminators_), std::move(offsets_), std::move(variants_), local_to_global_discriminators_);
}
std::string getName() const override;
const char * getFamilyName() const override { return "Variant"; }
TypeIndex getDataType() const override { return TypeIndex::Variant; }
MutableColumnPtr cloneEmpty() const override;
MutableColumnPtr cloneResized(size_t size) const override;
size_t ALWAYS_INLINE offsetAt(size_t i) const { return getOffsets()[i]; }
Discriminator ALWAYS_INLINE localDiscriminatorAt(size_t i) const { return getLocalDiscriminators()[i]; }
Discriminator ALWAYS_INLINE globalDiscriminatorAt(size_t i) const { return globalDiscriminatorByLocal(getLocalDiscriminators()[i]); }
Discriminator ALWAYS_INLINE globalDiscriminatorByLocal(Discriminator local_discr) const
{
/// NULL_DISCRIMINATOR is always the same in local and global orders.
return local_discr == NULL_DISCRIMINATOR ? NULL_DISCRIMINATOR : local_to_global_discriminators[local_discr];
}
Discriminator ALWAYS_INLINE localDiscriminatorByGlobal(Discriminator global_discr) const
{
/// NULL_DISCRIMINATOR is always the same in local and global orders.
return global_discr == NULL_DISCRIMINATOR ? NULL_DISCRIMINATOR : global_to_local_discriminators[global_discr];
}
size_t size() const override
{
return offsets->size();
}
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
bool isDefaultAt(size_t n) const override;
bool isNullAt(size_t n) const override;
StringRef getDataAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
void insert(const Field & x) override;
void insertIntoVariant(const Field & x, Discriminator global_discr);
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertDefault() override;
void insertManyDefaults(size_t length) override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type>
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
/// Variant type is not comparable.
int compareAt(size_t, size_t, const IColumn &, int) const override
{
return 0;
}
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method compareColumn is not supported for ColumnVariant");
}
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
void reserve(size_t n) override;
void ensureOwnership() override;
size_t byteSize() const override;
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
ColumnPtr compress() const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override;
bool isFinalized() const override;
const IColumn & getVariantByLocalDiscriminator(size_t discr) const { return *variants[discr]; }
const IColumn & getVariantByGlobalDiscriminator(size_t discr) const { return *variants[global_to_local_discriminators.at(discr)]; }
IColumn & getVariantByLocalDiscriminator(size_t discr) { return *variants[discr]; }
IColumn & getVariantByGlobalDiscriminator(size_t discr) { return *variants[global_to_local_discriminators.at(discr)]; }
const ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) const { return variants[discr]; }
const ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) const { return variants[global_to_local_discriminators.at(discr)]; }
ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) { return variants[discr]; }
ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; }
const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; }
IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; }
const ColumnPtr & getLocalDiscriminatorsPtr() const { return local_discriminators; }
ColumnPtr & getLocalDiscriminatorsPtr() { return local_discriminators; }
const Discriminators & ALWAYS_INLINE getLocalDiscriminators() const { return assert_cast<const ColumnDiscriminators &>(*local_discriminators).getData(); }
Discriminators & ALWAYS_INLINE getLocalDiscriminators() { return assert_cast<ColumnDiscriminators &>(*local_discriminators).getData(); }
const IColumn & getOffsetsColumn() const { return *offsets; }
IColumn & getOffsetsColumn() { return *offsets; }
const ColumnPtr & getOffsetsPtr() const { return offsets; }
ColumnPtr & getOffsetsPtr() { return offsets; }
const Offsets & ALWAYS_INLINE getOffsets() const { return assert_cast<const ColumnOffsets &>(*offsets).getData(); }
Offsets & ALWAYS_INLINE getOffsets() { return assert_cast<ColumnOffsets &>(*offsets).getData(); }
size_t getNumVariants() const { return variants.size(); }
bool hasOnlyNulls() const
{
/// If all variants are empty, we have only NULL values.
return std::all_of(variants.begin(), variants.end(), [](const WrappedPtr & v){ return v->empty(); });
}
/// Check if local and global order is the same.
bool hasGlobalVariantsOrder() const
{
for (size_t i = 0; i != local_to_global_discriminators.size(); ++i)
{
if (local_to_global_discriminators[i] != i)
return false;
}
return true;
}
/// Check if we have only 1 non-empty variant and no NULL values,
/// and if so, return the discriminator of this non-empty column.
std::optional<Discriminator> getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls() const;
/// Apply null map to a Variant column.
/// Replace corresponding discriminators with NULL_DISCRIMINATOR
/// and filter out rows in variants if needed.
void applyNullMap(const ColumnVector<UInt8>::Container & null_map);
void applyNegatedNullMap(const ColumnVector<UInt8>::Container & null_map);
private:
void initIdentityGlobalToLocalDiscriminatorsMapping();
template <bool inverted>
void applyNullMapImpl(const ColumnVector<UInt8>::Container & null_map);
WrappedPtr local_discriminators;
WrappedPtr offsets;
NestedColumns variants;
std::vector<Discriminator> global_to_local_discriminators;
std::vector<Discriminator> local_to_global_discriminators;
};
}

View File

@ -2,6 +2,7 @@
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnConst.h>
#include <Columns/MaskOperations.h>
#include <Columns/RadixSortHelper.h>
#include <IO/WriteHelpers.h>
@ -940,7 +941,7 @@ ColumnPtr ColumnVector<T>::compress() const
}
template <typename T>
ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
{
if (offsets.size() + shift != size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -949,7 +950,7 @@ ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, c
auto res = this->create();
auto & res_data = res->getData();
T default_value = static_cast<T>(default_field.safeGet<T>());
T default_value = assert_cast<const ColumnVector<T> &>(column_with_default_value.getDataColumn()).getElement(0);
res_data.resize_fill(total_rows, default_value);
for (size_t i = 0; i < offsets.size(); ++i)
res_data[offsets[i]] = data[i + shift];

View File

@ -300,7 +300,7 @@ public:
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override;
ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
ColumnPtr compress() const override;

View File

@ -2,6 +2,7 @@
#include <IO/Operators.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnConst.h>
#include <Core/Field.h>
#include <DataTypes/Serializations/SerializationInfo.h>
@ -34,7 +35,7 @@ void IColumn::insertFrom(const IColumn & src, size_t n)
insert(src[n]);
}
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
{
if (offsets.size() + shift != size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -50,14 +51,14 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
current_offset = offsets[i];
if (offsets_diff > 1)
res->insertMany(default_field, offsets_diff - 1);
res->insertManyFrom(column_with_default_value.getDataColumn(), 0, offsets_diff - 1);
res->insertFrom(*this, i + shift);
}
ssize_t offsets_diff = static_cast<ssize_t>(total_rows) - current_offset;
if (offsets_diff > 1)
res->insertMany(default_field, offsets_diff - 1);
res->insertManyFrom(column_with_default_value.getDataColumn(), 0, offsets_diff - 1);
return res;
}
@ -83,6 +84,11 @@ bool isColumnNullable(const IColumn & column)
return checkColumn<ColumnNullable>(column);
}
bool isColumnNullableOrLowCardinalityNullable(const IColumn & column)
{
return isColumnNullable(column) || isColumnLowCardinalityNullable(column);
}
bool isColumnConst(const IColumn & column)
{
return checkColumn<ColumnConst>(column);

View File

@ -34,6 +34,7 @@ class Arena;
class ColumnGathererStream;
class Field;
class WeakHash32;
class ColumnConst;
/*
* Represents a set of equal ranges in previous column to perform sorting in current column.
@ -459,10 +460,10 @@ public:
/// Returns column with @total_size elements.
/// In result column values from current column are at positions from @offsets.
/// Other values are filled by @default_value.
/// Other values are filled by value from @column_with_default_value.
/// @shift means how much rows to skip from the beginning of current column.
/// Used to create full column from sparse.
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const;
/// Compress column in memory to some representation that allows to decompress it back.
/// Return itself if compression is not applicable for this column type.
@ -659,4 +660,7 @@ bool isColumnConst(const IColumn & column);
/// True if column's an ColumnNullable instance. It's just a syntax sugar for type check.
bool isColumnNullable(const IColumn & column);
/// True if column's is ColumnNullable or ColumnLowCardinality with nullable nested column.
bool isColumnNullableOrLowCardinalityNullable(const IColumn & column);
}

View File

@ -17,7 +17,7 @@ namespace ErrorCodes
}
template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted)
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted, T default_value)
{
if (mask.size() < data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mask size should be no less than data size.");
@ -38,7 +38,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
--from;
}
else
data[index] = T();
data[index] = default_value;
--index;
}
@ -49,7 +49,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
/// Explicit instantiations - not to place the implementation of the function above in the header file.
#define INSTANTIATE(TYPE) \
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool);
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool, TYPE);
INSTANTIATE(UInt8)
INSTANTIATE(UInt16)

View File

@ -13,7 +13,7 @@ namespace DB
/// If inverted is true, we will work with inverted mask. This function is used in implementations of
/// expand() method in IColumn interface.
template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted);
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted, T default_value = T());
struct MaskInfo
{

View File

@ -0,0 +1,692 @@
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <gtest/gtest.h>
using namespace DB;
TEST(ColumnVariant, CreateFromEmptyColumns)
{
MutableColumns columns;
columns.push_back(ColumnUInt32::create());
columns.push_back(ColumnString::create());
auto column = ColumnVariant::create(std::move(columns));
ASSERT_TRUE(column->empty() && column->getLocalDiscriminators().empty() && column->getOffsets().empty());
}
TEST(ColumnVariant, CreateFromEmptyColumnsWithLocalOrder)
{
MutableColumns columns;
columns.push_back(ColumnUInt32::create());
columns.push_back(ColumnString::create());
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
local_to_global_discriminators.push_back(1);
local_to_global_discriminators.push_back(0);
auto column = ColumnVariant::create(std::move(columns), local_to_global_discriminators);
ASSERT_TRUE(column->empty() && column->getLocalDiscriminators().empty() && column->getOffsets().empty());
ASSERT_EQ(column->localDiscriminatorByGlobal(0), 0);
ASSERT_EQ(column->localDiscriminatorByGlobal(1), 1);
ASSERT_EQ(column->globalDiscriminatorByLocal(0), 0);
ASSERT_EQ(column->globalDiscriminatorByLocal(1), 1);
}
MutableColumns createColumns1()
{
MutableColumns columns;
auto column1 = ColumnUInt64::create();
column1->insertValue(42);
columns.push_back(std::move(column1));
auto column2 = ColumnString::create();
column2->insertData("Hello", 5);
column2->insertData("World", 5);
columns.push_back(std::move(column2));
auto column3 = ColumnUInt32::create();
columns.push_back(std::move(column3));
return columns;
}
MutableColumnPtr createDiscriminators1()
{
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
return discriminators_column;
}
void reorderColumns(const std::vector<ColumnVariant::Discriminator> & local_to_global_order, MutableColumns & columns)
{
MutableColumns res;
for (auto global_discr : local_to_global_order)
res.push_back(std::move(columns[global_discr]));
columns = std::move(res);
}
template <typename Ptr>
void reorderDiscriminators(const std::vector<ColumnVariant::Discriminator> & local_to_global_order, Ptr & discriminators)
{
std::vector<ColumnVariant::Discriminator> global_to_local_order(local_to_global_order.size());
for (size_t i = 0; i != local_to_global_order.size(); ++i)
global_to_local_order[local_to_global_order[i]] = i;
auto & discriminators_data = assert_cast<ColumnVariant::ColumnDiscriminators *>(discriminators.get())->getData();
for (auto & discr : discriminators_data)
{
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
discr = global_to_local_order[discr];
}
}
MutableColumnPtr createOffsets1()
{
auto offsets = ColumnVariant::ColumnOffsets::create();
offsets->insertValue(0);
offsets->insertValue(0);
offsets->insertValue(0);
offsets->insertValue(1);
offsets->insertValue(0);
return offsets;
}
std::vector<ColumnVariant::Discriminator> createLocalToGlobalOrder1()
{
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
local_to_global_discriminators.push_back(1);
local_to_global_discriminators.push_back(2);
local_to_global_discriminators.push_back(0);
return local_to_global_discriminators;
}
void checkColumnVariant1(ColumnVariant * column)
{
const auto & offsets = column->getOffsets();
ASSERT_EQ(column->size(), 5);
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 0);
ASSERT_EQ(offsets[3], 1);
ASSERT_TRUE(column->isDefaultAt(2) && column->isDefaultAt(4));
ASSERT_EQ((*column)[0].get<UInt32>(), 42);
ASSERT_EQ((*column)[1].get<String>(), "Hello");
ASSERT_TRUE((*column)[2].isNull());
ASSERT_EQ((*column)[3].get<String>(), "World");
ASSERT_TRUE((*column)[4].isNull());
}
void checkColumnVariant1Order(ColumnVariant * column)
{
ASSERT_EQ(column->localDiscriminatorByGlobal(0), 2);
ASSERT_EQ(column->localDiscriminatorByGlobal(1), 0);
ASSERT_EQ(column->localDiscriminatorByGlobal(2), 1);
ASSERT_EQ(column->globalDiscriminatorByLocal(0), 1);
ASSERT_EQ(column->globalDiscriminatorByLocal(1), 2);
ASSERT_EQ(column->globalDiscriminatorByLocal(2), 0);
ASSERT_EQ(column->localDiscriminatorAt(0), 2);
ASSERT_EQ(column->localDiscriminatorAt(1), 0);
ASSERT_EQ(column->localDiscriminatorAt(2), ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(column->localDiscriminatorAt(3), 0);
ASSERT_EQ(column->localDiscriminatorAt(4), ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
ASSERT_EQ(column->globalDiscriminatorAt(1), 1);
ASSERT_EQ(column->globalDiscriminatorAt(2), ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(column->globalDiscriminatorAt(3), 1);
ASSERT_EQ(column->globalDiscriminatorAt(4), ColumnVariant::NULL_DISCRIMINATOR);
}
TEST(ColumnVariant, CreateFromDiscriminatorsAndColumns)
{
auto columns = createColumns1();
auto discriminators = createDiscriminators1();
auto column = ColumnVariant::create(std::move(discriminators), std::move(columns));
checkColumnVariant1(column.get());
}
TEST(ColumnVariant, CreateFromDiscriminatorsAndColumnsWithLocalOrder)
{
auto local_to_global_order = createLocalToGlobalOrder1();
auto columns = createColumns1();
reorderColumns(local_to_global_order, columns);
auto discriminators = createDiscriminators1();
reorderDiscriminators(local_to_global_order, discriminators);
auto column = ColumnVariant::create(std::move(discriminators), std::move(columns), createLocalToGlobalOrder1());
checkColumnVariant1(column.get());
checkColumnVariant1Order(column.get());
}
TEST(ColumnVariant, CreateFromDiscriminatorsOffsetsAndColumns)
{
auto columns = createColumns1();
auto discriminators = createDiscriminators1();
auto offsets = createOffsets1();
auto column = ColumnVariant::create(std::move(discriminators), std::move(offsets), std::move(columns));
checkColumnVariant1(column.get());
}
TEST(ColumnVariant, CreateFromDiscriminatorsOffsetsAndColumnsWithLocalOrder)
{
auto local_to_global_order = createLocalToGlobalOrder1();
auto columns = createColumns1();
reorderColumns(local_to_global_order, columns);
auto discriminators = createDiscriminators1();
reorderDiscriminators(local_to_global_order, discriminators);
auto offsets = createOffsets1();
auto column = ColumnVariant::create(std::move(discriminators), std::move(offsets), std::move(columns), createLocalToGlobalOrder1());
checkColumnVariant1(column.get());
checkColumnVariant1Order(column.get());
}
ColumnVariant::MutablePtr createVariantWithOneFullColumNoNulls(size_t size, bool change_order)
{
MutableColumns columns;
auto column1 = ColumnUInt64::create();
for (size_t i = 0; i != size; ++i)
column1->insertValue(i);
columns.push_back(std::move(column1));
auto column2 = ColumnString::create();
columns.push_back(std::move(column2));
auto column3 = ColumnUInt32::create();
columns.push_back(std::move(column3));
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
for (size_t i = 0; i != size; ++i)
discriminators_column->insertValue(0);
if (change_order)
{
auto local_to_global_order = createLocalToGlobalOrder1();
reorderColumns(local_to_global_order, columns);
reorderDiscriminators(local_to_global_order, discriminators_column);
return ColumnVariant::create(std::move(discriminators_column), std::move(columns), createLocalToGlobalOrder1());
}
return ColumnVariant::create(std::move(discriminators_column), std::move(columns));
}
TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(3, false);
const auto & offsets = column->getOffsets();
ASSERT_EQ(column->size(), 3);
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 1);
ASSERT_EQ(offsets[2], 2);
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
ASSERT_EQ((*column)[2].get<UInt64>(), 2);
}
TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrder)
{
auto column = createVariantWithOneFullColumNoNulls(3, true);
const auto & offsets = column->getOffsets();
ASSERT_EQ(column->size(), 3);
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 1);
ASSERT_EQ(offsets[2], 2);
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
ASSERT_EQ((*column)[2].get<UInt64>(), 2);
ASSERT_EQ(column->localDiscriminatorAt(0), 2);
ASSERT_EQ(column->localDiscriminatorAt(1), 2);
ASSERT_EQ(column->localDiscriminatorAt(2), 2);
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
ASSERT_EQ(column->globalDiscriminatorAt(0), 0);
}
TEST(ColumnVariant, CloneResizedToEmpty)
{
auto column = ColumnVariant::create(createDiscriminators1(), createOffsets1(), createColumns1());
auto resized_column = column->cloneResized(0);
ASSERT_TRUE(resized_column->empty());
}
TEST(ColumnVariant, CloneResizedToLarge)
{
auto column = ColumnVariant::create(createDiscriminators1(), createOffsets1(), createColumns1());
auto resized_column = column->cloneResized(7);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 7);
const auto & offsets = resized_column_variant->getOffsets();
for (size_t i = 0; i != 7; ++i)
{
if (i == 3)
ASSERT_EQ(offsets[i], 1);
else
ASSERT_EQ(offsets[i], 0);
}
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
std::vector<size_t> null_indexes = {2, 4, 5, 6};
for (size_t i : null_indexes)
ASSERT_EQ(discriminators[i], ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 1);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 2);
}
TEST(ColumnVariant, CloneResizedWithOneFullColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(5, false);
auto resized_column = column->cloneResized(3);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 3);
const auto & offsets = resized_column_variant->getOffsets();
for (size_t i = 0; i != 3; ++i)
ASSERT_EQ(offsets[i], i);
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
for (size_t i = 0; i != 3; ++i)
ASSERT_EQ(discriminators[i], 0);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 3);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 0);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
}
MutableColumns createColumns2()
{
MutableColumns columns;
auto column1 = ColumnUInt64::create();
column1->insertValue(42);
column1->insertValue(43);
column1->insertValue(44);
columns.push_back(std::move(column1));
auto column2 = ColumnString::create();
column2->insertData("Hello", 5);
column2->insertData("World", 5);
columns.push_back(std::move(column2));
auto column3 = ColumnUInt8::create();
columns.push_back(std::move(column3));
return columns;
}
TEST(ColumnVariant, CloneResizedGeneral1)
{
/// D c1 c2 c3
/// 0 42 Hello
/// 1 43 World
/// NULL 44
/// 0
/// 1
/// NULL
/// 0
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
auto resized_column = column->cloneResized(4);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 4);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 2);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 1);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
ASSERT_EQ(discriminators[0], 0);
ASSERT_EQ(discriminators[1], 1);
ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(discriminators[3], 0);
const auto & offsets = resized_column_variant->getOffsets();
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 0);
ASSERT_EQ(offsets[3], 1);
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
ASSERT_EQ((*resized_column_variant)[1].get<String>(), "Hello");
ASSERT_EQ((*resized_column_variant)[3].get<UInt64>(), 43);
}
TEST(ColumnVariant, CloneResizedGeneral2)
{
/// D c1 c2 c3
/// 0 42 Hello
/// NULL 43 World
/// NULL 44
/// 0
/// 1
/// 1
/// 0
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(1);
discriminators_column->insertValue(0);
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
auto resized_column = column->cloneResized(3);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 3);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 1);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 0);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
ASSERT_EQ(discriminators[0], 0);
ASSERT_EQ(discriminators[1], ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR);
const auto & offsets = resized_column_variant->getOffsets();
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
}
TEST(ColumnVariant, CloneResizedGeneral3)
{
/// D c1 c2 c3
/// 0 42 Hello
/// 1 43 World
/// 1 44
/// 0
/// NULL
/// NULL
/// 0
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(1);
discriminators_column->insertValue(0);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
auto column = ColumnVariant::create(std::move(discriminators_column), createColumns2());
auto resized_column = column->cloneResized(5);
const auto * resized_column_variant = assert_cast<const ColumnVariant *>(resized_column.get());
ASSERT_EQ(resized_column_variant->size(), 5);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(0).size(), 2);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(1).size(), 2);
ASSERT_EQ(resized_column_variant->getVariantByLocalDiscriminator(2).size(), 0);
const auto & discriminators = resized_column_variant->getLocalDiscriminators();
ASSERT_EQ(discriminators[0], 0);
ASSERT_EQ(discriminators[1], 1);
ASSERT_EQ(discriminators[2], 1);
ASSERT_EQ(discriminators[3], 0);
const auto & offsets = resized_column_variant->getOffsets();
ASSERT_EQ(offsets[0], 0);
ASSERT_EQ(offsets[1], 0);
ASSERT_EQ(offsets[2], 1);
ASSERT_EQ(offsets[3], 1);
ASSERT_EQ((*resized_column_variant)[0].get<UInt64>(), 42);
ASSERT_EQ((*resized_column_variant)[1].get<String>(), "Hello");
ASSERT_EQ((*resized_column_variant)[2].get<String>(), "World");
ASSERT_EQ((*resized_column_variant)[3].get<UInt64>(), 43);
}
MutableColumnPtr createDiscriminators2()
{
auto discriminators_column = ColumnVariant::ColumnDiscriminators::create();
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
discriminators_column->insertValue(1);
discriminators_column->insertValue(ColumnVariant::NULL_DISCRIMINATOR);
discriminators_column->insertValue(0);
return discriminators_column;
}
std::vector<ColumnVariant::Discriminator> createLocalToGlobalOrder2()
{
std::vector<ColumnVariant::Discriminator> local_to_global_discriminators;
local_to_global_discriminators.push_back(2);
local_to_global_discriminators.push_back(0);
local_to_global_discriminators.push_back(1);
return local_to_global_discriminators;
}
ColumnVariant::MutablePtr createVariantColumn1(bool reorder)
{
auto columns = createColumns1();
auto discriminators = createDiscriminators1();
if (!reorder)
return ColumnVariant::create(std::move(discriminators), std::move(columns));
auto local_to_global_order = createLocalToGlobalOrder1();
reorderColumns(local_to_global_order, columns);
reorderDiscriminators(local_to_global_order, discriminators);
return ColumnVariant::create(std::move(discriminators), std::move(columns), local_to_global_order);
}
ColumnVariant::MutablePtr createVariantColumn2(bool reorder)
{
auto columns = createColumns2();
auto discriminators = createDiscriminators2();
if (!reorder)
return ColumnVariant::create(std::move(discriminators), std::move(columns));
auto local_to_global_order = createLocalToGlobalOrder2();
reorderColumns(local_to_global_order, columns);
reorderDiscriminators(local_to_global_order, discriminators);
return ColumnVariant::create(std::move(discriminators), std::move(columns), local_to_global_order);
}
TEST(ColumnVariant, InsertFrom)
{
for (bool change_order : {false, true})
{
auto column_to = createVariantColumn1(change_order);
auto column_from = createVariantColumn2(change_order);
column_to->insertFrom(*column_from, 3);
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0);
ASSERT_EQ((*column_to)[5].get<UInt64>(), 43);
}
}
TEST(ColumnVariant, InsertRangeFromOneColumnNoNulls)
{
for (bool change_order : {false, true})
{
auto column_to = createVariantColumn2(change_order);
auto column_from = createVariantWithOneFullColumNoNulls(5, change_order);
column_to->insertRangeFrom(*column_from, 2, 2);
ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0);
ASSERT_EQ(column_to->globalDiscriminatorAt(8), 0);
ASSERT_EQ((*column_to)[7].get<UInt64>(), 2);
ASSERT_EQ((*column_to)[8].get<UInt64>(), 3);
}
}
TEST(ColumnVariant, InsertRangeFromGeneral)
{
for (bool change_order : {false, true})
{
auto column_to = createVariantColumn1(change_order);
auto column_from = createVariantColumn2(change_order);
column_to->insertRangeFrom(*column_from, 1, 4);
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 1);
ASSERT_EQ(column_to->globalDiscriminatorAt(6), ColumnVariant::NULL_DISCRIMINATOR);
ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0);
ASSERT_EQ(column_to->globalDiscriminatorAt(8), 1);
ASSERT_EQ((*column_to)[5].get<String>(), "Hello");
ASSERT_EQ((*column_to)[7].get<UInt64>(), 43);
ASSERT_EQ((*column_to)[8].get<String>(), "World");
}
}
TEST(ColumnVariant, InsertManyFrom)
{
for (bool change_order : {false, true})
{
auto column_to = createVariantColumn1(change_order);
auto column_from = createVariantColumn2(change_order);
column_to->insertManyFrom(*column_from, 3, 2);
ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0);
ASSERT_EQ(column_to->globalDiscriminatorAt(6), 0);
ASSERT_EQ((*column_to)[5].get<UInt64>(), 43);
ASSERT_EQ((*column_to)[6].get<UInt64>(), 43);
}
}
TEST(ColumnVariant, PopBackOneColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(5, false);
column->popBack(3);
ASSERT_EQ(column->size(), 2);
ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 2);
ASSERT_EQ((*column)[0].get<UInt64>(), 0);
ASSERT_EQ((*column)[1].get<UInt64>(), 1);
}
TEST(ColumnVariant, PopBackGeneral)
{
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
column->popBack(4);
ASSERT_EQ(column->size(), 3);
ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 1);
ASSERT_EQ(column->getVariantByLocalDiscriminator(1).size(), 1);
ASSERT_EQ((*column)[0].get<UInt64>(), 42);
ASSERT_EQ((*column)[1].get<String>(), "Hello");
ASSERT_TRUE((*column)[2].isNull());
}
TEST(ColumnVariant, FilterOneColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(3, false);
IColumn::Filter filter;
filter.push_back(1);
filter.push_back(0);
filter.push_back(1);
auto filtered_column = column->filter(filter, -1);
ASSERT_EQ(filtered_column->size(), 2);
ASSERT_EQ((*filtered_column)[0].get<UInt64>(), 0);
ASSERT_EQ((*filtered_column)[1].get<UInt64>(), 2);
}
TEST(ColumnVariant, FilterGeneral)
{
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
IColumn::Filter filter;
filter.push_back(0);
filter.push_back(1);
filter.push_back(1);
filter.push_back(0);
filter.push_back(0);
filter.push_back(1);
filter.push_back(0);
auto filtered_column = column->filter(filter, -1);
ASSERT_EQ(filtered_column->size(), 3);
ASSERT_EQ((*filtered_column)[0].get<String>(), "Hello");
ASSERT_TRUE((*filtered_column)[1].isNull());
ASSERT_TRUE((*filtered_column)[2].isNull());
}
TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(4, false);
IColumn::Permutation permutation;
permutation.push_back(1);
permutation.push_back(3);
permutation.push_back(2);
permutation.push_back(0);
auto permuted_column = column->permute(permutation, 3);
ASSERT_EQ(permuted_column->size(), 3);
ASSERT_EQ((*permuted_column)[0].get<UInt64>(), 1);
ASSERT_EQ((*permuted_column)[1].get<UInt64>(), 3);
ASSERT_EQ((*permuted_column)[2].get<UInt64>(), 2);
auto index = ColumnUInt64::create();
index->getData().push_back(1);
index->getData().push_back(3);
index->getData().push_back(2);
index->getData().push_back(0);
auto indexed_column = column->index(*index, 3);
ASSERT_EQ(indexed_column->size(), 3);
ASSERT_EQ((*indexed_column)[0].get<UInt64>(), 1);
ASSERT_EQ((*indexed_column)[1].get<UInt64>(), 3);
ASSERT_EQ((*indexed_column)[2].get<UInt64>(), 2);
}
TEST(ColumnVariant, PermuteGeneral)
{
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
IColumn::Permutation permutation;
permutation.push_back(3);
permutation.push_back(4);
permutation.push_back(1);
permutation.push_back(5);
auto permuted_column = column->permute(permutation, 4);
ASSERT_EQ(permuted_column->size(), 4);
ASSERT_EQ((*permuted_column)[0].get<UInt64>(), 43);
ASSERT_EQ((*permuted_column)[1].get<String>(), "World");
ASSERT_EQ((*permuted_column)[2].get<String>(), "Hello");
ASSERT_TRUE((*permuted_column)[3].isNull());
}
TEST(ColumnVariant, ReplicateOneColumnNoNull)
{
auto column = createVariantWithOneFullColumNoNulls(3, false);
IColumn::Offsets offsets;
offsets.push_back(0);
offsets.push_back(3);
offsets.push_back(6);
auto replicated_column = column->replicate(offsets);
ASSERT_EQ(replicated_column->size(), 6);
ASSERT_EQ((*replicated_column)[0].get<UInt64>(), 1);
ASSERT_EQ((*replicated_column)[1].get<UInt64>(), 1);
ASSERT_EQ((*replicated_column)[2].get<UInt64>(), 1);
ASSERT_EQ((*replicated_column)[3].get<UInt64>(), 2);
ASSERT_EQ((*replicated_column)[4].get<UInt64>(), 2);
ASSERT_EQ((*replicated_column)[5].get<UInt64>(), 2);
}
TEST(ColumnVariant, ReplicateGeneral)
{
auto column = ColumnVariant::create(createDiscriminators1(), createColumns1());
IColumn::Offsets offsets;
offsets.push_back(1);
offsets.push_back(3);
offsets.push_back(5);
offsets.push_back(5);
offsets.push_back(7);
auto replicated_column = column->replicate(offsets);
ASSERT_EQ(replicated_column->size(), 7);
ASSERT_EQ((*replicated_column)[0].get<UInt64>(), 42);
ASSERT_EQ((*replicated_column)[1].get<String>(), "Hello");
ASSERT_EQ((*replicated_column)[2].get<String>(), "Hello");
ASSERT_TRUE((*replicated_column)[3].isNull());
ASSERT_TRUE((*replicated_column)[4].isNull());
ASSERT_TRUE((*replicated_column)[5].isNull());
ASSERT_TRUE((*replicated_column)[6].isNull());
}
TEST(ColumnVariant, ScatterOneColumnNoNulls)
{
auto column = createVariantWithOneFullColumNoNulls(5, false);
IColumn::Selector selector;
selector.push_back(0);
selector.push_back(1);
selector.push_back(2);
selector.push_back(0);
selector.push_back(1);
auto columns = column->scatter(3, selector);
ASSERT_EQ(columns[0]->size(), 2);
ASSERT_EQ((*columns[0])[0].get<UInt64>(), 0);
ASSERT_EQ((*columns[0])[1].get<UInt64>(), 3);
ASSERT_EQ(columns[1]->size(), 2);
ASSERT_EQ((*columns[1])[0].get<UInt64>(), 1);
ASSERT_EQ((*columns[1])[1].get<UInt64>(), 4);
ASSERT_EQ(columns[2]->size(), 1);
ASSERT_EQ((*columns[2])[0].get<UInt64>(), 2);
}
TEST(ColumnVariant, ScatterGeneral)
{
auto column = ColumnVariant::create(createDiscriminators2(), createColumns2());
IColumn::Selector selector;
selector.push_back(0);
selector.push_back(0);
selector.push_back(2);
selector.push_back(0);
selector.push_back(1);
selector.push_back(2);
selector.push_back(1);
auto columns = column->scatter(3, selector);
ASSERT_EQ(columns[0]->size(), 3);
ASSERT_EQ((*columns[0])[0].get<UInt64>(), 42);
ASSERT_EQ((*columns[0])[1].get<String>(), "Hello");
ASSERT_EQ((*columns[0])[2].get<UInt64>(), 43);
ASSERT_EQ(columns[1]->size(), 2);
ASSERT_EQ((*columns[1])[0].get<String>(), "World");
ASSERT_EQ((*columns[1])[1].get<UInt64>(), 44);
ASSERT_EQ(columns[2]->size(), 2);
ASSERT_TRUE((*columns[2])[0].isNull());
ASSERT_TRUE((*columns[2])[1].isNull());
}

View File

@ -230,7 +230,7 @@ class IColumn;
\
M(Bool, force_index_by_date, false, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
M(Bool, use_skip_indexes, true, "Use data skinipping indexes during query execution.", 0) \
M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \
M(String, ignore_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be excluded during query execution.", 0) \
\
@ -828,6 +828,7 @@ class IColumn;
M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \
M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \
M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \
M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
@ -835,6 +836,7 @@ class IColumn;
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \

View File

@ -87,6 +87,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
{"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},
{"allow_experimental_variant_type", false, false, "Add new experimental Variant type"},
{"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"},
{"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
{"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"},
{"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"},

View File

@ -49,6 +49,7 @@ enum class TypeIndex
IPv4,
IPv6,
JSONPaths,
Variant,
};
/**

View File

@ -290,6 +290,7 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeDomainGeo(*this);
registerDataTypeMap(*this);
registerDataTypeObject(*this);
registerDataTypeVariant(*this);
}
DataTypeFactory & DataTypeFactory::instance()

View File

@ -100,5 +100,6 @@ void registerDataTypeDomainBool(DataTypeFactory & factory);
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
void registerDataTypeDomainGeo(DataTypeFactory & factory);
void registerDataTypeObject(DataTypeFactory & factory);
void registerDataTypeVariant(DataTypeFactory & factory);
}

View File

@ -114,5 +114,33 @@ DataTypePtr makeNullableOrLowCardinalityNullable(const DataTypePtr & type)
return std::make_shared<DataTypeNullable>(type);
}
DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type)
{
if (isNullableOrLowCardinalityNullable(type))
return type;
if (type->lowCardinality())
{
const auto & dictionary_type = assert_cast<const DataTypeLowCardinality &>(*type).getDictionaryType();
return std::make_shared<DataTypeLowCardinality>(makeNullable(dictionary_type));
}
return makeNullableSafe(type);
}
DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type)
{
if (type->isNullable())
return static_cast<const DataTypeNullable &>(*type).getNestedType();
if (type->isLowCardinalityNullable())
{
auto dict_type = removeNullable(static_cast<const DataTypeLowCardinality &>(*type).getDictionaryType());
return std::make_shared<DataTypeLowCardinality>(dict_type);
}
return type;
}
}

View File

@ -54,5 +54,8 @@ DataTypePtr makeNullable(const DataTypePtr & type);
DataTypePtr makeNullableSafe(const DataTypePtr & type);
DataTypePtr removeNullable(const DataTypePtr & type);
DataTypePtr makeNullableOrLowCardinalityNullable(const DataTypePtr & type);
DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type);
/// Nullable(T) -> T, LowCardinality(Nullable(T)) -> T
DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type);
}

View File

@ -11,6 +11,7 @@
#include <DataTypes/Serializations/SerializationTuple.h>
#include <DataTypes/Serializations/SerializationNamed.h>
#include <DataTypes/Serializations/SerializationInfoTuple.h>
#include <DataTypes/Serializations/SerializationVariantElement.h>
#include <DataTypes/NestedUtils.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTNameTypePair.h>
@ -189,11 +190,15 @@ MutableColumnPtr DataTypeTuple::createColumn() const
MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const
{
/// If we read Tuple as Variant subcolumn, it may be wrapped to SerializationVariantElement.
/// Here we don't need it, so we drop this wrapper.
const auto * current_serialization = &serialization;
while (const auto * serialization_variant_element = typeid_cast<const SerializationVariantElement *>(current_serialization))
current_serialization = serialization_variant_element->getNested().get();
/// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed
/// several times to allow to reconstruct the substream path name.
/// Here we don't need substream path name, so we drop first several wrapper serializations.
const auto * current_serialization = &serialization;
while (const auto * serialization_named = typeid_cast<const SerializationNamed *>(current_serialization))
current_serialization = serialization_named->getNested().get();

View File

@ -0,0 +1,220 @@
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnConst.h>
#include <Core/Field.h>
#include <DataTypes/DataTypeVariant.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationVariant.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/FieldToDataType.h>
#include <Common/assert_cast.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Parsers/IAST.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int EMPTY_DATA_PASSED;
}
DataTypeVariant::DataTypeVariant(const DataTypes & variants_)
{
/// Sort nested types by their full names and squash identical types.
std::map<String, DataTypePtr> name_to_type;
for (const auto & type : variants_)
{
/// Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types are not allowed inside Variant type.
if (isNullableOrLowCardinalityNullable(type))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nullable/LowCardinality(Nullable) types are not allowed inside Variant type");
if (type->getTypeId() == TypeIndex::Variant)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nested Variant types are not allowed");
/// Don't use Nothing type as a variant.
if (!isNothing(type))
name_to_type[type->getName()] = type;
}
variants.reserve(name_to_type.size());
for (const auto & [_, type] : name_to_type)
variants.push_back(type);
if (variants.empty())
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty");
if (variants.size() > ColumnVariant::MAX_NESTED_COLUMNS)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Variant type with more than {} nested types is not allowed", ColumnVariant::MAX_NESTED_COLUMNS);
}
std::string DataTypeVariant::doGetName() const
{
size_t size = variants.size();
WriteBufferFromOwnString s;
s << "Variant(";
for (size_t i = 0; i < size; ++i)
{
if (i != 0)
s << ", ";
s << variants[i]->getName();
}
s << ")";
return s.str();
}
std::string DataTypeVariant::doGetPrettyName(size_t indent) const
{
size_t size = variants.size();
WriteBufferFromOwnString s;
s << "Variant(";
for (size_t i = 0; i != size; ++i)
{
if (i != 0)
s << ", ";
s << variants[i]->getPrettyName(indent);
}
s << ')';
return s.str();
}
MutableColumnPtr DataTypeVariant::createColumn() const
{
size_t size = variants.size();
MutableColumns nested_columns;
nested_columns.reserve(size);
for (size_t i = 0; i < size; ++i)
nested_columns.push_back(variants[i]->createColumn());
return ColumnVariant::create(std::move(nested_columns));
}
Field DataTypeVariant::getDefault() const
{
return Null();
}
bool DataTypeVariant::equals(const IDataType & rhs) const
{
if (typeid(rhs) != typeid(*this))
return false;
const DataTypeVariant & rhs_variant = static_cast<const DataTypeVariant &>(rhs);
size_t size = variants.size();
if (size != rhs_variant.variants.size())
return false;
for (size_t i = 0; i < size; ++i)
if (!variants[i]->equals(*rhs_variant.variants[i]))
return false;
return true;
}
bool DataTypeVariant::textCanContainOnlyValidUTF8() const
{
return std::all_of(variants.begin(), variants.end(), [](auto && elem) { return elem->textCanContainOnlyValidUTF8(); });
}
bool DataTypeVariant::haveMaximumSizeOfValue() const
{
return std::all_of(variants.begin(), variants.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); });
}
bool DataTypeVariant::hasDynamicSubcolumns() const
{
return std::any_of(variants.begin(), variants.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); });
}
std::optional<ColumnVariant::Discriminator> DataTypeVariant::tryGetVariantDiscriminator(const DataTypePtr & type) const
{
String type_name = type->getName();
for (size_t i = 0; i != variants.size(); ++i)
{
/// We don't use equals here, because it doesn't respect custom type names.
if (variants[i]->getName() == type_name)
return i;
}
return std::nullopt;
}
size_t DataTypeVariant::getMaximumSizeOfValueInMemory() const
{
size_t max_size = 0;
for (const auto & elem : variants)
{
size_t elem_max_size = elem->getMaximumSizeOfValueInMemory();
if (elem_max_size > max_size)
max_size = elem_max_size;
}
return max_size;
}
SerializationPtr DataTypeVariant::doGetDefaultSerialization() const
{
SerializationVariant::VariantSerializations serializations;
serializations.reserve(variants.size());
Names variant_names;
variant_names.reserve(variants.size());
for (const auto & variant : variants)
{
serializations.push_back(variant->getDefaultSerialization());
variant_names.push_back(variant->getName());
}
return std::make_shared<SerializationVariant>(std::move(serializations), std::move(variant_names), SerializationVariant::getVariantsDeserializeTextOrder(variants), getName());
}
static DataTypePtr create(const ASTPtr & arguments)
{
if (!arguments || arguments->children.empty())
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty");
DataTypes nested_types;
nested_types.reserve(arguments->children.size());
for (const ASTPtr & child : arguments->children)
nested_types.emplace_back(DataTypeFactory::instance().get(child));
return std::make_shared<DataTypeVariant>(nested_types);
}
bool isVariantExtension(const DataTypePtr & from_type, const DataTypePtr & to_type)
{
const auto * from_variant = typeid_cast<const DataTypeVariant *>(from_type.get());
const auto * to_variant = typeid_cast<const DataTypeVariant *>(to_type.get());
if (!from_variant || !to_variant)
return false;
const auto & to_variants = to_variant->getVariants();
std::unordered_set<String> to_variant_types;
to_variant_types.reserve(to_variants.size());
for (const auto & variant : to_variants)
to_variant_types.insert(variant->getName());
for (const auto & variant : from_variant->getVariants())
{
if (!to_variant_types.contains(variant->getName()))
return false;
}
return true;
}
void registerDataTypeVariant(DataTypeFactory & factory)
{
factory.registerDataType("Variant", create);
}
}

View File

@ -0,0 +1,68 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <Columns/ColumnVariant.h>
#include <optional>
namespace DB
{
/** Variant data type.
* This type represents a union of other data types.
* For example, type Variant(T1, T2, ..., TN) means that each row of this type
* has a value of either type T1 or T2 or ... or TN or none of them (NULL value).
* Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types are not allowed
* inside Variant type.
* The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
* To have global order of nested types we sort variants by type names on Variant creation.
* The index of a variant in a sorted list is called global variant discriminator.
*/
class DataTypeVariant final : public IDataType
{
private:
DataTypes variants;
public:
static constexpr bool is_parametric = true;
explicit DataTypeVariant(const DataTypes & variants_);
TypeIndex getTypeId() const override { return TypeIndex::Variant; }
const char * getFamilyName() const override { return "Variant"; }
bool canBeInsideNullable() const override { return false; }
bool supportsSparseSerialization() const override { return false; }
bool canBeInsideSparseColumns() const override { return false; }
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
bool equals(const IDataType & rhs) const override;
bool isParametric() const override { return true; }
bool haveSubtypes() const override { return true; }
bool textCanContainOnlyValidUTF8() const override;
bool haveMaximumSizeOfValue() const override;
bool hasDynamicSubcolumns() const override;
size_t getMaximumSizeOfValueInMemory() const override;
const DataTypePtr & getVariant(size_t i) const { return variants[i]; }
const DataTypes & getVariants() const { return variants; }
/// Check if Variant has provided type in the list of variants and return its discriminator.
std::optional<ColumnVariant::Discriminator> tryGetVariantDiscriminator(const DataTypePtr & type) const;
private:
std::string doGetName() const override;
std::string doGetPrettyName(size_t indent) const override;
SerializationPtr doGetDefaultSerialization() const override;
};
/// Check if conversion from from_type to to_type is Variant extension
/// (both types are Variants and to_type contains all variants from from_type).
bool isVariantExtension(const DataTypePtr & from_type, const DataTypePtr & to_type);
}

View File

@ -74,6 +74,25 @@ T EnumValues<T>::getValue(StringRef field_name, bool try_treat_as_id) const
return it->getMapped();
}
template <typename T>
bool EnumValues<T>::tryGetValue(T & x, StringRef field_name, bool try_treat_as_id) const
{
const auto it = name_to_value_map.find(field_name);
if (!it)
{
/// It is used in CSV and TSV input formats. If we fail to find given string in
/// enum names, we will try to treat it as enum id.
if (try_treat_as_id)
{
ReadBufferFromMemory tmp_buf(field_name.data, field_name.size);
return tryReadText(x, tmp_buf) && tmp_buf.eof() && value_to_name_map.contains(x);
}
return false;
}
x = it->getMapped();
return true;
}
template <typename T>
Names EnumValues<T>::getAllRegisteredNames() const
{

View File

@ -7,7 +7,7 @@
namespace DB
{
namespace ErrorCodes
namespace ErrorCodesEnumValues
{
extern const int BAD_ARGUMENTS;
}
@ -42,6 +42,11 @@ public:
return it;
}
bool hasValue(const T & value) const
{
return value_to_name_map.contains(value);
}
/// throws exception if value is not valid
const StringRef & getNameForValue(const T & value) const
{
@ -60,6 +65,7 @@ public:
}
T getValue(StringRef field_name, bool try_treat_as_id = false) const;
bool tryGetValue(T & x, StringRef field_name, bool try_treat_as_id = false) const;
template <typename TValues>
bool containsAll(const TValues & rhs_values) const

View File

@ -109,11 +109,26 @@ Ptr IDataType::getForSubcolumn(
bool throw_if_null) const
{
Ptr res;
forEachSubcolumn([&](const auto &, const auto & name, const auto & subdata)
ISerialization::StreamCallback callback_with_data = [&](const auto & subpath)
{
if (name == subcolumn_name)
res = subdata.*member;
}, data);
for (size_t i = 0; i < subpath.size(); ++i)
{
size_t prefix_len = i + 1;
if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, prefix_len))
{
auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
/// Create data from path only if it's requested subcolumn.
if (name == subcolumn_name)
res = ISerialization::createFromPath(subpath, prefix_len).*member;
}
subpath[i].visited = true;
}
};
ISerialization::EnumerateStreamsSettings settings;
settings.position_independent_encoding = false;
data.serialization->enumerateStreams(settings, callback_with_data, data);
if (!res && throw_if_null)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());

View File

@ -150,7 +150,7 @@ public:
/** Create ColumnConst for corresponding type, with specified size and value.
*/
ColumnPtr createColumnConst(size_t size, const Field & field) const;
virtual ColumnPtr createColumnConst(size_t size, const Field & field) const;
ColumnPtr createColumnConstWithDefaultValue(size_t size) const;
/** Get default value of data type.
@ -412,6 +412,8 @@ struct WhichDataType
constexpr bool isSimple() const { return isInt() || isUInt() || isFloat() || isString(); }
constexpr bool isLowCardinality() const { return idx == TypeIndex::LowCardinality; }
constexpr bool isVariant() const { return idx == TypeIndex::Variant; }
};
/// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
@ -464,6 +466,7 @@ template <typename T> inline bool isTuple(const T & data_type) { return WhichDat
template <typename T> inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); }
template <typename T> inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); }
template <typename T> inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); }
template <typename T> inline bool isVariant(const T & data_type) { return WhichDataType(data_type).isVariant(); }
template <typename T> inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); }

View File

@ -54,6 +54,7 @@ const std::set<SubstreamType> ISerialization::Substream::named_types
TupleElement,
NamedOffsets,
NamedNullMap,
NamedVariantDiscriminators,
};
String ISerialization::Substream::toString() const
@ -61,6 +62,9 @@ String ISerialization::Substream::toString() const
if (named_types.contains(type))
return fmt::format("{}({})", type, name_of_substream);
if (type == VariantElement)
return fmt::format("VariantElement({})", variant_element_name);
return String(magic_enum::enum_name(type));
}
@ -186,6 +190,12 @@ String getNameForSubstreamPath(
else
stream_name += substream_name;
}
else if (it->type == Substream::VariantDiscriminators)
stream_name += ".variant_discr";
else if (it->type == Substream::VariantOffsets)
stream_name += ".variant_offsets";
else if (it->type == Substream::VariantElement)
stream_name += "." + it->variant_element_name;
}
return stream_name;
@ -274,6 +284,53 @@ bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path)
return true;
}
namespace
{
template <typename F>
bool tryDeserializeText(const F deserialize, DB::IColumn & column)
{
size_t prev_size = column.size();
try
{
deserialize(column);
return true;
}
catch (...)
{
if (column.size() > prev_size)
column.popBack(column.size() - prev_size);
return false;
}
}
}
bool ISerialization::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeTextCSV(my_column, istr, settings); }, column);
}
bool ISerialization::tryDeserializeTextEscaped(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeTextEscaped(my_column, istr, settings); }, column);
}
bool ISerialization::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeTextJSON(my_column, istr, settings); }, column);
}
bool ISerialization::tryDeserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeTextQuoted(my_column, istr, settings); }, column);
}
bool ISerialization::tryDeserializeWholeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
return tryDeserializeText([&](DB::IColumn & my_column) { deserializeWholeText(my_column, istr, settings); }, column);
}
void ISerialization::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
@ -283,6 +340,15 @@ void ISerialization::deserializeTextRaw(IColumn & column, ReadBuffer & istr, con
deserializeWholeText(column, buf, settings);
}
bool ISerialization::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
/// Read until \t or \n.
readString(field, istr);
ReadBufferFromString buf(field);
return tryDeserializeWholeText(column, buf, settings);
}
void ISerialization::serializeTextMarkdown(
const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
{
@ -310,7 +376,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref
size_t last_elem = prefix_len - 1;
return path[last_elem].type == Substream::NullMap
|| path[last_elem].type == Substream::TupleElement
|| path[last_elem].type == Substream::ArraySizes;
|| path[last_elem].type == Substream::ArraySizes
|| path[last_elem].type == Substream::VariantElement;
}
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
@ -339,6 +406,8 @@ void ISerialization::throwUnexpectedDataAfterParsedValue(IColumn & column, ReadB
{
WriteBufferFromOwnString ostr;
serializeText(column, column.size() - 1, ostr, settings);
/// Restore correct column size.
column.popBack(1);
throw Exception(
ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE,
"Unexpected data '{}' after parsed {} value '{}'",

View File

@ -154,6 +154,12 @@ public:
ObjectStructure,
ObjectData,
VariantDiscriminators,
NamedVariantDiscriminators,
VariantOffsets,
VariantElements,
VariantElement,
Regular,
};
@ -162,6 +168,9 @@ public:
Type type;
/// The name of a variant element type.
String variant_element_name;
/// Name of substream for type from 'named_types'.
String name_of_substream;
@ -321,17 +330,20 @@ public:
virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
virtual bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
/** Text serialization as a literal that may be inserted into a query.
*/
virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
virtual bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
/** Text serialization for the CSV format.
*/
virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
virtual bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
/** Text serialization for displaying on a terminal or saving into a text file, and the like.
* Without escaping or quoting.
@ -341,11 +353,13 @@ public:
/** Text deserialization in case when buffer contains only one value, without any escaping and delimiters.
*/
virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
virtual bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
/** Text serialization intended for using in JSON format.
*/
virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
virtual bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t /*indent*/) const
{
serializeTextJSON(column, row_num, ostr, settings);
@ -365,6 +379,7 @@ public:
* additional code in data types serialization and ReadHelpers.
*/
virtual void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
virtual bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
virtual void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
virtual void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;

View File

@ -419,9 +419,11 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe
}
template <typename Reader>
static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
template <typename ReturnType = void, typename Reader>
static ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
ColumnArray::Offsets & offsets = column_array.getOffsets();
@ -433,7 +435,18 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
if (checkChar('[', istr))
has_braces = true;
else if (!allow_unenclosed)
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");
return ReturnType(false);
}
auto on_error_no_throw = [&]()
{
if (size)
nested_column.popBack(size);
return ReturnType(false);
};
try
{
@ -443,11 +456,17 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
if (!first)
{
if (*istr.position() == ',')
{
++istr.position();
}
else
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
"Cannot read array from text, expected comma or end of array, found '{}'",
*istr.position());
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
"Cannot read array from text, expected comma or end of array, found '{}'",
*istr.position());
return on_error_no_throw();
}
}
first = false;
@ -457,25 +476,42 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
if (*istr.position() == ']')
break;
read_nested(nested_column);
if constexpr (throw_exception)
read_nested(nested_column);
else if (!read_nested(nested_column))
return on_error_no_throw();
++size;
skipWhitespaceIfAny(istr);
}
if (has_braces)
assertChar(']', istr);
{
if constexpr (throw_exception)
assertChar(']', istr);
else if (!checkChar(']', istr))
return on_error_no_throw();
}
else /// If array is not enclosed in braces, we read until EOF.
assertEOF(istr);
{
if constexpr (throw_exception)
assertEOF(istr);
else if (!istr.eof())
return on_error_no_throw();
}
}
catch (...)
{
if (size)
nested_column.popBack(size);
throw;
if constexpr (throw_exception)
throw;
return ReturnType(false);
}
offsets.push_back(offsets.back() + size);
return ReturnType(true);
}
@ -494,8 +530,8 @@ void SerializationArray::deserializeText(IColumn & column, ReadBuffer & istr, co
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(nested_column, istr, settings, nested);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(nested_column, istr, settings, nested);
else
nested->deserializeTextQuoted(nested_column, istr, settings);
}, false);
@ -504,6 +540,29 @@ void SerializationArray::deserializeText(IColumn & column, ReadBuffer & istr, co
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Array");
}
bool SerializationArray::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
{
auto read_nested = [&](IColumn & nested_column)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(nested_column, istr, settings, nested);
return nested->tryDeserializeTextQuoted(nested_column, istr, settings);
};
bool ok = deserializeTextImpl<bool>(column, istr, std::move(read_nested), false);
if (!ok)
return false;
if (whole && !istr.eof())
{
column.popBack(1);
return false;
}
return true;
}
void SerializationArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
@ -559,13 +618,25 @@ void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
if (settings.null_as_default)
SerializationNullable::deserializeTextJSONImpl(nested_column, istr, settings, nested);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested);
else
nested->deserializeTextJSON(nested_column, istr, settings);
}, false);
}
bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
auto read_nested = [&](IColumn & nested_column)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested);
return nested->tryDeserializeTextJSON(nested_column, istr, settings);
};
return deserializeTextImpl<bool>(column, istr, std::move(read_nested), false);
}
void SerializationArray::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
@ -608,8 +679,8 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
if (settings.null_as_default)
SerializationNullable::deserializeTextCSVImpl(nested_column, rb, settings, nested);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(nested_column, rb, settings, nested);
else
nested->deserializeTextCSV(nested_column, rb, settings);
}, true);
@ -619,12 +690,43 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(nested_column, rb, settings, nested);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(nested_column, rb, settings, nested);
else
nested->deserializeTextQuoted(nested_column, rb, settings);
}, true);
}
}
bool SerializationArray::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
if (!tryReadCSV(s, istr, settings.csv))
return false;
ReadBufferFromString rb(s);
if (settings.csv.arrays_as_nested_csv)
{
auto read_nested = [&](IColumn & nested_column)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(nested_column, rb, settings, nested);
return nested->tryDeserializeTextCSV(nested_column, rb, settings);
};
return deserializeTextImpl<bool>(column, rb, read_nested, true);
}
else
{
auto read_nested = [&](IColumn & nested_column)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(nested_column, rb, settings, nested);
return nested->tryDeserializeTextQuoted(nested_column, rb, settings);
};
return deserializeTextImpl<bool>(column, rb, read_nested, true);
}
}
}

View File

@ -20,15 +20,18 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Streaming serialization of arrays is arranged in a special way:
* - elements placed in a row are written/read without array sizes;

View File

@ -150,30 +150,42 @@ bool tryDeserializeAllVariants(ColumnUInt8 * column, ReadBuffer & istr)
return true;
}
void deserializeImpl(
template <typename ReturnType = void>
ReturnType deserializeImpl(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function<bool(ReadBuffer &)> check_end_of_value)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
auto restore_column_if_needed = [&, prev_size = col->size()]()
{
if (col->size() > prev_size)
col->popBack(1);
};
PeekableReadBuffer buf(istr);
buf.setCheckpoint();
if (checkString(settings.bool_true_representation, buf) && check_end_of_value(buf))
{
col->insert(true);
return;
return ReturnType(true);
}
buf.rollbackToCheckpoint();
if (checkString(settings.bool_false_representation, buf) && check_end_of_value(buf))
{
col->insert(false);
buf.dropCheckpoint();
if (buf.hasUnreadData())
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
"bool_true_representation or bool_false_representation contains some delimiters of input format");
return;
{
if constexpr (throw_exception)
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
"bool_true_representation or bool_false_representation contains some delimiters of input format");
return ReturnType(false);
}
col->insert(false);
return ReturnType(true);
}
buf.rollbackToCheckpoint();
@ -181,22 +193,31 @@ void deserializeImpl(
{
buf.dropCheckpoint();
if (buf.hasUnreadData())
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
"bool_true_representation or bool_false_representation contains some delimiters of input format");
return;
{
if constexpr (throw_exception)
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
"bool_true_representation or bool_false_representation contains some delimiters of input format");
restore_column_if_needed();
return ReturnType(false);
}
return ReturnType(true);
}
buf.makeContinuousMemoryFromCheckpointToPos();
buf.rollbackToCheckpoint();
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and "
"bool_false_representation or one of "
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0",
String(buf.position(), std::min(10lu, buf.available())),
settings.bool_true_representation, settings.bool_false_representation);
restore_column_if_needed();
if constexpr (throw_exception)
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and "
"bool_false_representation or one of "
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0",
String(buf.position(), std::min(10lu, buf.available())),
settings.bool_true_representation, settings.bool_false_representation);
return ReturnType(false);
}
}
@ -225,6 +246,14 @@ void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & is
deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
}
bool SerializationBool::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (istr.eof())
return false;
return deserializeImpl<bool>(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
}
void SerializationBool::serializeTextJSON(const IColumn &column, size_t row_num, WriteBuffer &ostr, const FormatSettings &settings) const
{
serializeSimple(column, row_num, ostr, settings);
@ -250,6 +279,33 @@ void SerializationBool::deserializeTextJSON(IColumn &column, ReadBuffer &istr, c
col->insert(value);
}
bool SerializationBool::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{
if (istr.eof())
return false;
ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
bool value = false;
char first_char = *istr.position();
if (first_char == 't' || first_char == 'f')
{
if (!readBoolTextWord<bool>(value, istr))
return false;
}
else if (first_char == '1' || first_char == '0')
{
/// Doesn't throw.
readBoolText(value, istr);
}
else
{
return false;
}
col->insert(value);
return true;
}
void SerializationBool::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeCustom(column, row_num, ostr, settings);
@ -263,6 +319,14 @@ void SerializationBool::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r'; });
}
bool SerializationBool::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (istr.eof())
return false;
return deserializeImpl<bool>(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r'; });
}
void SerializationBool::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeCustom(column, row_num, ostr, settings);
@ -276,15 +340,30 @@ void SerializationBool::deserializeTextRaw(IColumn & column, ReadBuffer & istr,
deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
}
bool SerializationBool::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (istr.eof())
return false;
return deserializeImpl<bool>(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
}
void SerializationBool::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeSimple(column, row_num, ostr, settings);
}
void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
template <typename ReturnType>
ReturnType deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if (istr.eof())
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
return ReturnType(false);
}
auto * col = checkAndGetDeserializeColumnType(column);
@ -292,11 +371,17 @@ void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
switch (symbol)
{
case 't':
assertStringCaseInsensitive("true", istr);
if constexpr (throw_exception)
assertStringCaseInsensitive("true", istr);
else if (!checkStringCaseInsensitive("true", istr))
return ReturnType(false);
col->insert(true);
break;
case 'f':
assertStringCaseInsensitive("false", istr);
if constexpr (throw_exception)
assertStringCaseInsensitive("false", istr);
else if (!checkStringCaseInsensitive("false", istr))
return ReturnType(false);
col->insert(false);
break;
case '1':
@ -307,16 +392,40 @@ void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
break;
case '\'':
++istr.position();
deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return !buf.eof() && *buf.position() == '\''; });
assertChar('\'', istr);
if constexpr (throw_exception)
{
deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return !buf.eof() && *buf.position() == '\''; });
assertChar('\'', istr);
}
else
{
if (!deserializeImpl<bool>(column, istr, settings, [](ReadBuffer & buf) { return !buf.eof() && *buf.position() == '\''; }) || !checkChar('\'', istr))
return ReturnType(false);
}
break;
default:
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot parse boolean value here: '{}', should be true/false, 1/0 or on of "
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0 in quotes",
String(istr.position(), std::min(10ul, istr.available())));
{
if constexpr (throw_exception)
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot parse boolean value here: '{}', should be true/false, 1/0 or on of "
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0 in quotes",
String(istr.position(), std::min(10ul, istr.available())));
return ReturnType(false);
}
}
return ReturnType(true);
}
void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextQuotedImpl<void>(column, istr, settings);
}
bool SerializationBool::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return deserializeTextQuotedImpl<bool>(column, istr, settings);
}
void SerializationBool::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@ -327,6 +436,14 @@ void SerializationBool::deserializeWholeText(IColumn & column, ReadBuffer & istr
deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof(); });
}
bool SerializationBool::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (istr.eof())
return false;
return deserializeImpl<bool>(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof(); });
}
void SerializationBool::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeSimple(column, row_num, ostr, settings);

View File

@ -15,21 +15,27 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
};

View File

@ -24,6 +24,12 @@ void deserializeFromString(const SerializationCustomSimpleText & domain, IColumn
domain.deserializeText(column, istr, settings, true);
}
bool tryDeserializeFromString(const SerializationCustomSimpleText & domain, IColumn & column, const String & s, const FormatSettings & settings)
{
ReadBufferFromString istr(s);
return domain.tryDeserializeText(column, istr, settings, true);
}
}
namespace DB
@ -34,6 +40,19 @@ SerializationCustomSimpleText::SerializationCustomSimpleText(const Serialization
{
}
bool SerializationCustomSimpleText::tryDeserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, bool whole) const
{
try
{
deserializeText(column, istr, settings, whole);
return true;
}
catch (...)
{
return false;
}
}
void SerializationCustomSimpleText::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
@ -41,6 +60,13 @@ void SerializationCustomSimpleText::deserializeWholeText(IColumn & column, ReadB
deserializeFromString(*this, column, str, settings);
}
bool SerializationCustomSimpleText::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readStringUntilEOF(str, istr);
return tryDeserializeFromString(*this, column, str, settings);
}
void SerializationCustomSimpleText::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeEscapedString(serializeToString(*this, column, row_num, settings), ostr);
@ -53,6 +79,13 @@ void SerializationCustomSimpleText::deserializeTextEscaped(IColumn & column, Rea
deserializeFromString(*this, column, str, settings);
}
bool SerializationCustomSimpleText::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readEscapedString(str, istr);
return tryDeserializeFromString(*this, column, str, settings);
}
void SerializationCustomSimpleText::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeQuotedString(serializeToString(*this, column, row_num, settings), ostr);
@ -65,6 +98,14 @@ void SerializationCustomSimpleText::deserializeTextQuoted(IColumn & column, Read
deserializeFromString(*this, column, str, settings);
}
bool SerializationCustomSimpleText::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
if (!tryReadQuotedString(str, istr))
return false;
return tryDeserializeFromString(*this, column, str, settings);
}
void SerializationCustomSimpleText::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeCSVString(serializeToString(*this, column, row_num, settings), ostr);
@ -77,6 +118,13 @@ void SerializationCustomSimpleText::deserializeTextCSV(IColumn & column, ReadBuf
deserializeFromString(*this, column, str, settings);
}
bool SerializationCustomSimpleText::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
readCSVStringInto<String, false, false>(str, istr, settings.csv);
return tryDeserializeFromString(*this, column, str, settings);
}
void SerializationCustomSimpleText::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(serializeToString(*this, column, row_num, settings), ostr, settings);
@ -89,6 +137,14 @@ void SerializationCustomSimpleText::deserializeTextJSON(IColumn & column, ReadBu
deserializeFromString(*this, column, str, settings);
}
bool SerializationCustomSimpleText::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String str;
if (!tryReadJSONStringInto(str, istr))
return false;
return tryDeserializeFromString(*this, column, str, settings);
}
void SerializationCustomSimpleText::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeXMLStringForTextElement(serializeToString(*this, column, row_num, settings), ostr);

View File

@ -22,20 +22,24 @@ public:
/// whole = true means that buffer contains only one value, so we should read until EOF.
/// It's needed to check if there is garbage after parsed field.
virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const = 0;
virtual bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const;
/** Text deserialization in case when buffer contains only one value, without any escaping and delimiters.
*/
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Text serialization with escaping but without quoting.
*/
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Text serialization as a literal that may be inserted into a query.
*/
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Text serialization for the CSV format.
*/
@ -44,12 +48,14 @@ public:
* (the delimiter is not consumed).
*/
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Text serialization intended for using in JSON format.
* force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes.
*/
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Text serialization for putting into the XML format.
*/

View File

@ -22,6 +22,15 @@ void SerializationDate::deserializeWholeText(IColumn & column, ReadBuffer & istr
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Date");
}
bool SerializationDate::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
if (!tryReadDateText(x, istr, time_zone) || !istr.eof())
return false;
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
return true;
}
void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
@ -29,6 +38,15 @@ void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & is
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
bool SerializationDate::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
if (!tryReadDateText(x, istr, time_zone))
return false;
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
return true;
}
void SerializationDate::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
@ -50,6 +68,16 @@ void SerializationDate::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
assert_cast<ColumnUInt16 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
bool SerializationDate::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
if (!checkChar('\'', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('\'', istr))
return false;
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
return true;
}
void SerializationDate::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -66,6 +94,15 @@ void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
bool SerializationDate::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr))
return false;
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
return true;
}
void SerializationDate::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -80,6 +117,15 @@ void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
assert_cast<ColumnUInt16 &>(column).getData().push_back(value);
}
bool SerializationDate::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum value;
if (!tryReadCSV(value, istr, time_zone))
return false;
assert_cast<ColumnUInt16 &>(column).getData().push_back(value);
return true;
}
SerializationDate::SerializationDate(const DateLUTImpl & time_zone_) : time_zone(time_zone_)
{
}

View File

@ -13,14 +13,19 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
protected:
const DateLUTImpl & time_zone;

View File

@ -21,6 +21,15 @@ void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & is
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Date32");
}
bool SerializationDate32::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
if (!tryReadDateText(x, istr, time_zone) || !istr.eof())
return false;
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
return true;
}
void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
@ -28,6 +37,15 @@ void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer &
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}
bool SerializationDate32::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
if (!tryReadDateText(x, istr, time_zone))
return false;
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
return true;
}
void SerializationDate32::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
@ -49,6 +67,15 @@ void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & i
assert_cast<ColumnInt32 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
bool SerializationDate32::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
if (!checkChar('\'', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('\'', istr))
return false;
assert_cast<ColumnInt32 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
return true;
}
void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -65,6 +92,15 @@ void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & ist
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}
bool SerializationDate32::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr))
return false;
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
return true;
}
void SerializationDate32::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -79,6 +115,15 @@ void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr
assert_cast<ColumnInt32 &>(column).getData().push_back(value.getExtenedDayNum());
}
bool SerializationDate32::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
LocalDate value;
if (!tryReadCSV(value, istr))
return false;
assert_cast<ColumnInt32 &>(column).getData().push_back(value.getExtenedDayNum());
return true;
}
SerializationDate32::SerializationDate32(const DateLUTImpl & time_zone_) : time_zone(time_zone_)
{
}

View File

@ -12,14 +12,19 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
protected:
const DateLUTImpl & time_zone;

View File

@ -21,15 +21,56 @@ inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & setti
switch (settings.date_time_input_format)
{
case FormatSettings::DateTimeInputFormat::Basic:
readDateTimeText(x, istr, time_zone);
return;
readDateTimeTextImpl<>(x, istr, time_zone);
break;
case FormatSettings::DateTimeInputFormat::BestEffort:
parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone);
return;
break;
case FormatSettings::DateTimeInputFormat::BestEffortUS:
parseDateTimeBestEffortUS(x, istr, time_zone, utc_time_zone);
return;
break;
}
if (x < 0)
x = 0;
}
inline void readAsIntText(time_t & x, ReadBuffer & istr)
{
readIntText(x, istr);
if (x < 0)
x = 0;
}
inline bool tryReadText(time_t & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
{
bool res;
switch (settings.date_time_input_format)
{
case FormatSettings::DateTimeInputFormat::Basic:
res = tryReadDateTimeText(x, istr, time_zone);
break;
case FormatSettings::DateTimeInputFormat::BestEffort:
res = tryParseDateTimeBestEffort(x, istr, time_zone, utc_time_zone);
break;
case FormatSettings::DateTimeInputFormat::BestEffortUS:
res = tryParseDateTimeBestEffortUS(x, istr, time_zone, utc_time_zone);
break;
}
if (x < 0)
x = 0;
return res;
}
inline bool tryReadAsIntText(time_t & x, ReadBuffer & istr)
{
if (!tryReadIntText(x, istr))
return false;
if (x < 0)
x = 0;
return true;
}
}
@ -68,15 +109,32 @@ void SerializationDateTime::deserializeWholeText(IColumn & column, ReadBuffer &
throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime");
}
bool SerializationDateTime::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone) || !istr.eof())
return false;
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
return true;
}
void SerializationDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
readText(x, istr, settings, time_zone, utc_time_zone);
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
}
bool SerializationDateTime::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone))
return false;
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
return true;
}
void SerializationDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
@ -94,15 +152,32 @@ void SerializationDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer &
}
else /// Just 1504193808 or 01504193808
{
readIntText(x, istr);
readAsIntText(x, istr);
}
if (x < 0)
x = 0;
/// It's important to do this at the end - for exception safety.
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
}
bool SerializationDateTime::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
{
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone) || !checkChar('\'', istr))
return false;
}
else /// Just 1504193808 or 01504193808
{
if (!tryReadAsIntText(x, istr))
return false;
}
/// It's important to do this at the end - for exception safety.
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
return true;
}
void SerializationDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -120,13 +195,30 @@ void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & i
}
else
{
readIntText(x, istr);
readAsIntText(x, istr);
}
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
}
bool SerializationDateTime::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (checkChar('"', istr))
{
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone) || !checkChar('"', istr))
return false;
}
else
{
if (!tryReadIntText(x, istr))
return false;
}
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
return true;
}
void SerializationDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -165,13 +257,48 @@ void SerializationDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & is
readCSVString(datetime_str, istr, settings.csv);
ReadBufferFromString buf(datetime_str);
readText(x, buf, settings, time_zone, utc_time_zone);
if (!buf.eof())
throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime");
}
}
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
}
bool SerializationDateTime::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (istr.eof())
return false;
char maybe_quote = *istr.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
{
++istr.position();
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone) || !checkChar(maybe_quote, istr))
return false;
}
else
{
if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
{
if (!tryReadText(x, istr, settings, time_zone, utc_time_zone))
return false;
}
else
{
String datetime_str;
readCSVString(datetime_str, istr, settings.csv);
ReadBufferFromString buf(datetime_str);
if (!tryReadText(x, buf, settings, time_zone, utc_time_zone) || !buf.eof())
return false;
}
}
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
return true;
}
}

View File

@ -15,14 +15,19 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
};
}

View File

@ -47,6 +47,16 @@ void SerializationDateTime64::deserializeText(IColumn & column, ReadBuffer & ist
throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime64");
}
bool SerializationDateTime64::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const
{
DateTime64 result = 0;
if (!tryReadDateTime64Text(result, scale, istr, time_zone) || (whole && !istr.eof()))
return false;
assert_cast<ColumnType &>(column).getData().push_back(result);
return true;
}
void SerializationDateTime64::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
@ -75,6 +85,29 @@ static inline void readText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, con
}
}
static inline bool tryReadText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
{
switch (settings.date_time_input_format)
{
case FormatSettings::DateTimeInputFormat::Basic:
return tryReadDateTime64Text(x, scale, istr, time_zone);
case FormatSettings::DateTimeInputFormat::BestEffort:
return tryParseDateTime64BestEffort(x, scale, istr, time_zone, utc_time_zone);
case FormatSettings::DateTimeInputFormat::BestEffortUS:
return tryParseDateTime64BestEffortUS(x, scale, istr, time_zone, utc_time_zone);
}
}
bool SerializationDateTime64::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone) || !istr.eof())
return false;
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
void SerializationDateTime64::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
@ -82,6 +115,15 @@ void SerializationDateTime64::deserializeTextEscaped(IColumn & column, ReadBuffe
assert_cast<ColumnType &>(column).getData().push_back(x);
}
bool SerializationDateTime64::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone))
return false;
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
void SerializationDateTime64::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
@ -104,6 +146,23 @@ void SerializationDateTime64::deserializeTextQuoted(IColumn & column, ReadBuffer
assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
bool SerializationDateTime64::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
{
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone) || !checkChar('\'', istr))
return false;
}
else /// Just 1504193808 or 01504193808
{
if (!tryReadIntText(x, istr))
return false;
}
assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
return true;
}
void SerializationDateTime64::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -126,6 +185,23 @@ void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer &
assert_cast<ColumnType &>(column).getData().push_back(x);
}
bool SerializationDateTime64::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (checkChar('"', istr))
{
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone) || !checkChar('"', istr))
return false;
}
else
{
if (!tryReadIntText(x, istr))
return false;
}
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
void SerializationDateTime64::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -170,4 +246,40 @@ void SerializationDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer &
assert_cast<ColumnType &>(column).getData().push_back(x);
}
bool SerializationDateTime64::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (istr.eof())
return false;
char maybe_quote = *istr.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
{
++istr.position();
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone) || !checkChar(maybe_quote, istr))
return false;
}
else
{
if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
{
if (!tryReadText(x, scale, istr, settings, time_zone, utc_time_zone))
return false;
}
else
{
String datetime_str;
readCSVString(datetime_str, istr, settings.csv);
ReadBufferFromString buf(datetime_str);
if (!tryReadText(x, scale, buf, settings, time_zone, utc_time_zone) || !buf.eof())
return false;
}
}
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
}

View File

@ -15,15 +15,21 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
};
}

View File

@ -16,11 +16,19 @@ namespace ErrorCodes
}
template <typename T>
bool SerializationDecimal<T>::tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale)
bool SerializationDecimal<T>::tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale, bool csv)
{
UInt32 unread_scale = scale;
if (!tryReadDecimalText(istr, x, precision, unread_scale))
return false;
if (csv)
{
if (!tryReadCSVDecimalText(istr, x, precision, unread_scale))
return false;
}
else
{
if (!tryReadDecimalText(istr, x, precision, unread_scale))
return false;
}
if (common::mulOverflow(x.value, DecimalUtils::scaleMultiplier<T>(unread_scale), x.value))
return false;
@ -59,6 +67,16 @@ void SerializationDecimal<T>::deserializeText(IColumn & column, ReadBuffer & ist
ISerialization::throwUnexpectedDataAfterParsedValue(column, istr, settings, "Decimal");
}
template <typename T>
bool SerializationDecimal<T>::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const
{
T x;
if (!tryReadText(x, istr) || (whole && !istr.eof()))
return false;
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
template <typename T>
void SerializationDecimal<T>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
@ -67,6 +85,16 @@ void SerializationDecimal<T>::deserializeTextCSV(IColumn & column, ReadBuffer &
assert_cast<ColumnType &>(column).getData().push_back(x);
}
template <typename T>
bool SerializationDecimal<T>::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
T x;
if (!tryReadText(x, istr, true))
return false;
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
template <typename T>
void SerializationDecimal<T>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
@ -88,6 +116,18 @@ void SerializationDecimal<T>::deserializeTextJSON(IColumn & column, ReadBuffer &
assertChar('"', istr);
}
template <typename T>
bool SerializationDecimal<T>::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
bool have_quotes = checkChar('"', istr);
T x;
if (!tryReadText(x, istr) || (have_quotes && !checkChar('"', istr)))
return false;
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
template class SerializationDecimal<Decimal32>;
template class SerializationDecimal<Decimal64>;

View File

@ -16,15 +16,19 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void readText(T & x, ReadBuffer & istr, bool csv = false) const { readText(x, istr, this->precision, this->scale, csv); }
bool tryReadText(T & x, ReadBuffer & istr, bool csv = false) const { return tryReadText(x, istr, this->precision, this->scale, csv); }
static void readText(T & x, ReadBuffer & istr, UInt32 precision_, UInt32 scale_, bool csv = false);
static bool tryReadText(T & x, ReadBuffer & istr, UInt32 precision_, UInt32 scale_);
static bool tryReadText(T & x, ReadBuffer & istr, UInt32 precision_, UInt32 scale_, bool csv = false);
};
}

View File

@ -34,6 +34,27 @@ void SerializationEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffe
}
}
template <typename Type>
bool SerializationEnum<Type>::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
FieldType x;
if (settings.tsv.enum_as_number)
{
if (!tryReadValue(istr, x))
return false;
}
else
{
std::string field_name;
readEscapedString(field_name, istr);
if (!ref_enum_values.tryGetValue(x, StringRef(field_name), true))
return false;
}
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
template <typename Type>
void SerializationEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
@ -48,6 +69,20 @@ void SerializationEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer
assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name)));
}
template <typename Type>
bool SerializationEnum<Type>::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
std::string field_name;
if (!tryReadQuotedStringWithSQLStyle(field_name, istr))
return false;
FieldType x;
if (!ref_enum_values.tryGetValue(x, StringRef(field_name)))
return false;
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
template <typename Type>
void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
@ -65,6 +100,27 @@ void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer
}
}
template <typename Type>
bool SerializationEnum<Type>::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
FieldType x;
if (settings.tsv.enum_as_number)
{
if (!tryReadValue(istr, x) || !istr.eof())
return false;
}
else
{
std::string field_name;
readStringUntilEOF(field_name, istr);
if (!ref_enum_values.tryGetValue(x, StringRef(field_name), true))
return false;
}
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
template <typename Type>
void SerializationEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
@ -90,6 +146,27 @@ void SerializationEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer &
}
}
template <typename Type>
bool SerializationEnum<Type>::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
FieldType x;
if (!istr.eof() && *istr.position() != '"')
{
if (!tryReadValue(istr, x))
return false;
}
else
{
std::string field_name;
readJSONString(field_name, istr);
if (!ref_enum_values.tryGetValue(x, StringRef(field_name)))
return false;
}
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
template <typename Type>
void SerializationEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
@ -109,6 +186,28 @@ void SerializationEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer &
}
}
template <typename Type>
bool SerializationEnum<Type>::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
FieldType x;
if (settings.csv.enum_as_number)
{
if (!tryReadValue(istr, x))
return false;
}
else
{
std::string field_name;
readCSVString(field_name, istr, settings.csv);
if (!ref_enum_values.tryGetValue(x, StringRef(field_name), true))
return false;
}
assert_cast<ColumnType &>(column).getData().push_back(x);
return true;
}
template <typename Type>
void SerializationEnum<Type>::serializeTextMarkdown(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -34,15 +34,20 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@ -53,6 +58,11 @@ public:
return ref_enum_values.findByValue(x)->first;
}
bool tryReadValue(ReadBuffer & istr, FieldType & x) const
{
return tryReadText(x, istr) && ref_enum_values.hasValue(x);
}
std::optional<EnumValues<Type>> own_enum_values;
std::shared_ptr<const DataTypeEnum<Type>> own_enum_type;
const EnumValues<Type> & ref_enum_values;

View File

@ -150,12 +150,49 @@ static inline void read(const SerializationFixedString & self, IColumn & column,
}
}
bool SerializationFixedString::tryAlignStringLength(size_t n, PaddedPODArray<UInt8> & data, size_t string_start)
{
size_t length = data.size() - string_start;
if (length < n)
{
data.resize_fill(string_start + n);
}
else if (length > n)
{
data.resize_assume_reserved(string_start);
return false;
}
return true;
}
template <typename Reader>
static inline bool tryRead(const SerializationFixedString & self, IColumn & column, Reader && reader)
{
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
size_t prev_size = data.size();
try
{
return reader(data) && SerializationFixedString::tryAlignStringLength(self.getN(), data, prev_size);
}
catch (...)
{
data.resize_assume_reserved(prev_size);
return false;
}
}
void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); });
}
bool SerializationFixedString::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); return true; });
}
void SerializationFixedString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
@ -169,12 +206,22 @@ void SerializationFixedString::deserializeTextQuoted(IColumn & column, ReadBuffe
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); });
}
bool SerializationFixedString::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { return tryReadQuotedStringInto<true>(data, istr); });
}
void SerializationFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringUntilEOFInto(data, istr); });
}
bool SerializationFixedString::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringUntilEOFInto(data, istr); return true; });
}
void SerializationFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
@ -188,6 +235,10 @@ void SerializationFixedString::deserializeTextJSON(IColumn & column, ReadBuffer
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); });
}
bool SerializationFixedString::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
return tryRead(*this, column, [&istr](ColumnFixedString::Chars & data) { return tryReadJSONStringInto(data, istr); });
}
void SerializationFixedString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
@ -208,6 +259,11 @@ void SerializationFixedString::deserializeTextCSV(IColumn & column, ReadBuffer &
read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); });
}
bool SerializationFixedString::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return tryRead(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto<ColumnFixedString::Chars, false, false>(data, istr, csv); return true; });
}
void SerializationFixedString::serializeTextMarkdown(
const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
{

View File

@ -26,20 +26,25 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@ -47,6 +52,7 @@ public:
/// If the length is less than getN() the function will add zero characters up to getN().
/// If the length is greater than getN() the function will throw an exception.
static void alignStringLength(size_t n, PaddedPODArray<UInt8> & data, size_t string_start);
static bool tryAlignStringLength(size_t n, PaddedPODArray<UInt8> & data, size_t string_start);
};
}

View File

@ -0,0 +1,187 @@
#include <DataTypes/Serializations/SerializationIPv4andIPv6.h>
namespace DB
{
template <typename IPv>
void SerializationIP<IPv>::serializeText(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings &) const
{
writeText(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
}
template <typename IPv>
void SerializationIP<IPv>::deserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, bool whole) const
{
IPv x;
readText(x, istr);
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
if (whole && !istr.eof())
throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
}
template <typename IPv>
bool SerializationIP<IPv>::tryDeserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &, bool whole) const
{
IPv x;
if (!tryReadText(x, istr) || (whole && !istr.eof()))
return false;
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
return true;
}
template <typename IPv>
void SerializationIP<IPv>::serializeTextQuoted(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
template <typename IPv>
void SerializationIP<IPv>::deserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{
IPv x;
assertChar('\'', istr);
readText(x, istr);
assertChar('\'', istr);
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
template <typename IPv>
bool SerializationIP<IPv>::tryDeserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{
IPv x;
if (!checkChar('\'', istr) || !tryReadText(x, istr) || !checkChar('\'', istr))
return false;
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
return true;
}
template <typename IPv>
void SerializationIP<IPv>::serializeTextJSON(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
template <typename IPv>
void SerializationIP<IPv>::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
IPv x;
assertChar('"', istr);
readText(x, istr);
/// this code looks weird, but we want to throw specific exception to match original behavior...
if (istr.eof())
assertChar('"', istr);
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
if (*istr.position() != '"')
throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
istr.ignore();
}
template <typename IPv>
bool SerializationIP<IPv>::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{
IPv x;
if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr))
return false;
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
return true;
}
template <typename IPv>
void SerializationIP<IPv>::serializeTextCSV(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
template <typename IPv>
void SerializationIP<IPv>::deserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{
IPv value;
readCSV(value, istr);
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(value);
}
template <typename IPv>
bool SerializationIP<IPv>::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{
IPv value;
if (!tryReadCSV(value, istr))
return false;
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(value);
return true;
}
template <typename IPv>
void SerializationIP<IPv>::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
{
IPv x = field.get<IPv>();
if constexpr (std::is_same_v<IPv, IPv6>)
writeBinary(x, ostr);
else
writeBinaryLittleEndian(x, ostr);
}
template <typename IPv>
void SerializationIP<IPv>::deserializeBinary(DB::Field & field, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{
IPv x;
if constexpr (std::is_same_v<IPv, IPv6>)
readBinary(x, istr);
else
readBinaryLittleEndian(x, istr);
field = NearestFieldType<IPv>(x);
}
template <typename IPv>
void SerializationIP<IPv>::serializeBinary(const DB::IColumn & column, size_t row_num, DB::WriteBuffer & ostr, const DB::FormatSettings &) const
{
writeBinary(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
}
template <typename IPv>
void SerializationIP<IPv>::deserializeBinary(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{
IPv x;
readBinary(x.toUnderType(), istr);
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
}
template <typename IPv>
void SerializationIP<IPv>::serializeBinaryBulk(const DB::IColumn & column, DB::WriteBuffer & ostr, size_t offset, size_t limit) const
{
const typename ColumnVector<IPv>::Container & x = typeid_cast<const ColumnVector<IPv> &>(column).getData();
size_t size = x.size();
if (limit == 0 || offset + limit > size)
limit = size - offset;
if (limit)
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(IPv) * limit);
}
template <typename IPv>
void SerializationIP<IPv>::deserializeBinaryBulk(DB::IColumn & column, DB::ReadBuffer & istr, size_t limit, double) const
{
typename ColumnVector<IPv>::Container & x = typeid_cast<ColumnVector<IPv> &>(column).getData();
size_t initial_size = x.size();
x.resize(initial_size + limit);
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(IPv) * limit);
x.resize(initial_size + size / sizeof(IPv));
}
template class SerializationIP<IPv4>;
template class SerializationIP<IPv6>;
}

View File

@ -13,123 +13,30 @@ template <typename IPv>
class SerializationIP : public SimpleTextSerialization
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
writeText(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
}
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override
{
IPv x;
readText(x, istr);
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
if (whole && !istr.eof())
throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
}
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
{
serializeText(column, row_num, ostr, settings);
}
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
deserializeText(column, istr, settings, false);
}
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
{
IPv x;
assertChar('\'', istr);
readText(x, istr);
assertChar('\'', istr);
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
IPv x;
assertChar('"', istr);
readText(x, istr);
/// this code looks weird, but we want to throw specific exception to match original behavior...
if (istr.eof())
assertChar('"', istr);
if (*istr.position() != '"')
throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
istr.ignore();
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
}
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override
{
IPv value;
readCSV(value, istr);
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override;
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(value);
}
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override
{
IPv x = field.get<IPv>();
if constexpr (std::is_same_v<IPv, IPv6>)
writeBinary(x, ostr);
else
writeBinaryLittleEndian(x, ostr);
}
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override
{
IPv x;
if constexpr (std::is_same_v<IPv, IPv6>)
readBinary(x, istr);
else
readBinaryLittleEndian(x, istr);
field = NearestFieldType<IPv>(x);
}
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
writeBinary(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
}
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
{
IPv x;
readBinary(x.toUnderType(), istr);
assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
}
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override
{
const typename ColumnVector<IPv>::Container & x = typeid_cast<const ColumnVector<IPv> &>(column).getData();
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
size_t size = x.size();
if (limit == 0 || offset + limit > size)
limit = size - offset;
if (limit)
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(IPv) * limit);
}
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const override
{
typename ColumnVector<IPv>::Container & x = typeid_cast<ColumnVector<IPv> &>(column).getData();
size_t initial_size = x.size();
x.resize(initial_size + limit);
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(IPv) * limit);
x.resize(initial_size + size / sizeof(IPv));
}
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const override;
};
using SerializationIPv4 = SerializationIP<IPv4>;

View File

@ -700,6 +700,11 @@ void SerializationLowCardinality::deserializeTextEscaped(IColumn & column, ReadB
deserializeImpl(column, &ISerialization::deserializeTextEscaped, istr, settings);
}
bool SerializationLowCardinality::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextEscaped, istr, settings);
}
void SerializationLowCardinality::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &ISerialization::serializeTextQuoted, ostr, settings);
@ -710,11 +715,21 @@ void SerializationLowCardinality::deserializeTextQuoted(IColumn & column, ReadBu
deserializeImpl(column, &ISerialization::deserializeTextQuoted, istr, settings);
}
bool SerializationLowCardinality::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextQuoted, istr, settings);
}
void SerializationLowCardinality::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeImpl(column, &ISerialization::deserializeWholeText, istr, settings);
}
bool SerializationLowCardinality::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return tryDeserializeImpl(column, &ISerialization::tryDeserializeWholeText, istr, settings);
}
void SerializationLowCardinality::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &ISerialization::serializeTextCSV, ostr, settings);
@ -725,6 +740,11 @@ void SerializationLowCardinality::deserializeTextCSV(IColumn & column, ReadBuffe
deserializeImpl(column, &ISerialization::deserializeTextCSV, istr, settings);
}
bool SerializationLowCardinality::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextCSV, istr, settings);
}
void SerializationLowCardinality::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &ISerialization::serializeText, ostr, settings);
@ -740,6 +760,11 @@ void SerializationLowCardinality::deserializeTextJSON(IColumn & column, ReadBuff
deserializeImpl(column, &ISerialization::deserializeTextJSON, istr, settings);
}
bool SerializationLowCardinality::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextJSON, istr, settings);
}
void SerializationLowCardinality::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &ISerialization::serializeTextXML, ostr, settings);
@ -750,6 +775,11 @@ void SerializationLowCardinality::deserializeTextRaw(IColumn & column, ReadBuffe
deserializeImpl(column, &ISerialization::deserializeTextRaw, istr, settings);
}
bool SerializationLowCardinality::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return tryDeserializeImpl(column, &ISerialization::tryDeserializeTextRaw, istr, settings);
}
void SerializationLowCardinality::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeImpl(column, row_num, &ISerialization::serializeTextRaw, ostr, settings);
@ -769,7 +799,7 @@ template <typename... Params, typename... Args>
void SerializationLowCardinality::deserializeImpl(
IColumn & column, SerializationLowCardinality::DeserializeFunctionPtr<Params...> func, Args &&... args) const
{
auto & low_cardinality_column= getColumnLowCardinality(column);
auto & low_cardinality_column = getColumnLowCardinality(column);
auto temp_column = low_cardinality_column.getDictionary().getNestedColumn()->cloneEmpty();
auto serialization = dictionary_type->getDefaultSerialization();
@ -778,4 +808,19 @@ void SerializationLowCardinality::deserializeImpl(
low_cardinality_column.insertFromFullColumn(*temp_column, 0);
}
template <typename... Params, typename... Args>
bool SerializationLowCardinality::tryDeserializeImpl(
IColumn & column, SerializationLowCardinality::TryDeserializeFunctionPtr<Params...> func, Args &&... args) const
{
auto & low_cardinality_column = getColumnLowCardinality(column);
auto temp_column = low_cardinality_column.getDictionary().getNestedColumn()->cloneEmpty();
auto serialization = dictionary_type->getDefaultSerialization();
if (!(serialization.get()->*func)(*temp_column, std::forward<Args>(args)...))
return false;
low_cardinality_column.insertFromFullColumn(*temp_column, 0);
return true;
}
}

View File

@ -55,16 +55,22 @@ public:
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
private:
@ -79,6 +85,12 @@ private:
template <typename ... Params, typename... Args>
void deserializeImpl(IColumn & column, DeserializeFunctionPtr<Params...> func, Args &&... args) const;
template <typename ... Params>
using TryDeserializeFunctionPtr = bool (ISerialization::*)(IColumn &, Params ...) const;
template <typename ... Params, typename... Args>
bool tryDeserializeImpl(IColumn & column, TryDeserializeFunctionPtr<Params...> func, Args &&... args) const;
};
}

View File

@ -115,9 +115,11 @@ void SerializationMap::serializeTextImpl(
writeChar('}', ostr);
}
template <typename Reader>
void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const
template <typename ReturnType, typename Reader>
ReturnType SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
auto & column_map = assert_cast<ColumnMap &>(column);
auto & nested_array = column_map.getNestedColumn();
@ -128,7 +130,21 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
auto & value_column = nested_tuple.getColumn(1);
size_t size = 0;
assertChar('{', istr);
if constexpr (throw_exception)
assertChar('{', istr);
else if (!checkChar('{', istr))
return ReturnType(false);
auto on_error_no_throw = [&]()
{
if (size)
{
nested_tuple.getColumnPtr(0) = key_column.cut(0, offsets.back());
nested_tuple.getColumnPtr(1) = value_column.cut(0, offsets.back());
}
return ReturnType(false);
};
try
{
@ -138,9 +154,15 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
if (!first)
{
if (*istr.position() == ',')
{
++istr.position();
}
else
throw Exception(ErrorCodes::CANNOT_READ_MAP_FROM_TEXT, "Cannot read Map from text");
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::CANNOT_READ_MAP_FROM_TEXT, "Cannot read Map from text");
return on_error_no_throw();
}
}
first = false;
@ -150,19 +172,32 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
if (*istr.position() == '}')
break;
reader(istr, key, key_column);
if constexpr (throw_exception)
reader(istr, key, key_column);
else if (!reader(istr, key, key_column))
return on_error_no_throw();
++size;
skipWhitespaceIfAny(istr);
assertChar(':', istr);
if constexpr (throw_exception)
assertChar(':', istr);
else if (!checkChar(':', istr))
return on_error_no_throw();
skipWhitespaceIfAny(istr);
reader(istr, value, value_column);
if constexpr (throw_exception)
reader(istr, value, value_column);
else if (!reader(istr, value, value_column))
return on_error_no_throw();
skipWhitespaceIfAny(istr);
}
assertChar('}', istr);
if constexpr (throw_exception)
assertChar('}', istr);
else if (!checkChar('}', istr))
return on_error_no_throw();
}
catch (...)
{
@ -171,10 +206,14 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
nested_tuple.getColumnPtr(0) = key_column.cut(0, offsets.back());
nested_tuple.getColumnPtr(1) = value_column.cut(0, offsets.back());
}
throw;
if constexpr (throw_exception)
throw;
return ReturnType(false);
}
offsets.push_back(offsets.back() + size);
return ReturnType(true);
}
void SerializationMap::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -192,8 +231,8 @@ void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, cons
deserializeTextImpl(column, istr,
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(subcolumn, buf, settings, subcolumn_serialization);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(subcolumn, buf, settings, subcolumn_serialization);
else
subcolumn_serialization->deserializeTextQuoted(subcolumn, buf, settings);
});
@ -202,6 +241,28 @@ void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, cons
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Map");
}
bool SerializationMap::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
{
auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(subcolumn, buf, settings, subcolumn_serialization);
return subcolumn_serialization->tryDeserializeTextQuoted(subcolumn, buf, settings);
};
auto ok = deserializeTextImpl<bool>(column, istr, reader);
if (!ok)
return false;
if (whole && !istr.eof())
{
column.popBack(1);
return false;
}
return true;
}
void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
@ -260,13 +321,25 @@ void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
deserializeTextImpl(column, istr,
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
if (settings.null_as_default)
SerializationNullable::deserializeTextJSONImpl(subcolumn, buf, settings, subcolumn_serialization);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
else
subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings);
});
}
bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings);
};
return deserializeTextImpl<bool>(column, istr, reader);
}
void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const auto & column_map = assert_cast<const ColumnMap &>(column);
@ -308,6 +381,15 @@ void SerializationMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c
deserializeText(column, rb, settings, true);
}
bool SerializationMap::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
if (!tryReadCSV(s, istr, settings.csv))
return false;
ReadBufferFromString rb(s);
return tryDeserializeText(column, rb, settings, true);
}
void SerializationMap::enumerateStreams(
EnumerateStreamsSettings & settings,
const StreamCallback & callback,

View File

@ -24,13 +24,16 @@ public:
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void enumerateStreams(
EnumerateStreamsSettings & settings,
@ -68,8 +71,8 @@ private:
template <typename KeyWriter, typename ValueWriter>
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
template <typename Reader>
void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
template <typename ReturnType = void, typename Reader>
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
};
}

View File

@ -25,6 +25,7 @@ public:
void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
/// These methods read and write zero bytes just to allow to figure out size of column.
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;

View File

@ -189,55 +189,59 @@ void SerializationNullable::serializeBinary(const IColumn & column, size_t row_n
nested->serializeBinary(col.getNestedColumn(), row_num, ostr, settings);
}
/// Deserialize value into ColumnNullable.
/// We need to insert both to nested column and to null byte map, or, in case of exception, to not insert at all.
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested>
requires std::same_as<ReturnType, void>
static ReturnType
safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
template <typename ReturnType>
ReturnType safeAppendToNullMap(ColumnNullable & column, bool is_null)
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
if (check_for_null())
try
{
col.insertDefault();
column.getNullMapData().push_back(is_null);
}
else
catch (...)
{
deserialize_nested(col.getNestedColumn());
try
{
col.getNullMapData().push_back(0);
}
catch (...)
{
col.getNestedColumn().popBack(1);
column.getNestedColumn().popBack(1);
if constexpr (std::is_same_v<ReturnType, void>)
throw;
}
return ReturnType(false);
}
return ReturnType(true);
}
/// Deserialize value into non-nullable column. In case of NULL, insert default value and return false.
/// Deserialize value into non-nullable column. In case of NULL, insert default and set is_null to true.
/// If ReturnType is bool, return true if parsing was successful and false in case of any error.
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested>
requires std::same_as<ReturnType, bool>
static ReturnType
safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
static ReturnType deserializeImpl(IColumn & column, ReadBuffer & buf, CheckForNull && check_for_null, DeserializeNested && deserialize_nested, bool & is_null)
{
bool insert_default = check_for_null();
if (insert_default)
is_null = check_for_null(buf);
if (is_null)
{
column.insertDefault();
}
else
deserialize_nested(column);
return !insert_default;
{
if constexpr (std::is_same_v<ReturnType, void>)
deserialize_nested(column, buf);
else if (!deserialize_nested(column, buf))
return ReturnType(false);
}
return ReturnType(true);
}
void SerializationNullable::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
safeDeserialize(column, *nested,
[&istr] { bool is_null = false; readBinary(is_null, istr); return is_null; },
[this, &istr, settings] (IColumn & nested_column) { nested->deserializeBinary(nested_column, istr, settings); });
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
auto check_for_null = [](ReadBuffer & buf)
{
bool is_null_ = false;
readBinary(is_null_, buf);
return is_null_;
};
auto deserialize_nested = [this, &settings] (IColumn & nested_column, ReadBuffer & buf) { nested->deserializeBinary(nested_column, buf, settings); };
deserializeImpl(col.getNestedColumn(), istr, check_for_null, deserialize_nested, is_null);
safeAppendToNullMap<void>(col, is_null);
}
@ -246,20 +250,19 @@ void SerializationNullable::serializeTextEscaped(const IColumn & column, size_t
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
if (col.isNullAt(row_num))
writeString(settings.tsv.null_representation, ostr);
serializeNullEscaped(ostr, settings);
else
nested->serializeTextEscaped(col.getNestedColumn(), row_num, ostr, settings);
}
void SerializationNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void SerializationNullable::serializeNullEscaped(DB::WriteBuffer & ostr, const DB::FormatSettings & settings)
{
deserializeTextEscapedImpl<void>(column, istr, settings, nested);
writeString(settings.tsv.null_representation, ostr);
}
void SerializationNullable::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
bool SerializationNullable::tryDeserializeNullEscaped(DB::ReadBuffer & istr, const DB::FormatSettings & settings)
{
deserializeTextRawImpl<void>(column, istr, settings, nested);
return checkString(settings.tsv.null_representation, istr);
}
void SerializationNullable::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -267,72 +270,73 @@ void SerializationNullable::serializeTextRaw(const IColumn & column, size_t row_
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
if (col.isNullAt(row_num))
writeString(settings.tsv.null_representation, ostr);
serializeNullRaw(ostr, settings);
else
nested->serializeTextRaw(col.getNestedColumn(), row_num, ostr, settings);
}
template<typename ReturnType>
ReturnType SerializationNullable::deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested)
void SerializationNullable::serializeNullRaw(DB::WriteBuffer & ostr, const DB::FormatSettings & settings)
{
return deserializeTextEscapedAndRawImpl<ReturnType, false>(column, istr, settings, nested);
writeString(settings.tsv.null_representation, ostr);
}
template<typename ReturnType>
ReturnType SerializationNullable::deserializeTextEscapedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
const SerializationPtr & nested)
bool SerializationNullable::tryDeserializeNullRaw(DB::ReadBuffer & istr, const DB::FormatSettings & settings)
{
return deserializeTextEscapedAndRawImpl<ReturnType, true>(column, istr, settings, nested);
return checkString(settings.tsv.null_representation, istr);
}
template<typename ReturnType, bool escaped>
ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
const SerializationPtr & nested_serialization)
ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
const String & null_representation = settings.tsv.null_representation;
auto deserialize_nested = [&nested_serialization, &settings] (IColumn & nested_column, ReadBuffer & buf_)
{
if constexpr (throw_exception)
{
if constexpr (escaped)
nested_serialization->deserializeTextEscaped(nested_column, buf_, settings);
else
nested_serialization->deserializeTextRaw(nested_column, buf_, settings);
}
else
{
if constexpr (escaped)
return nested_serialization->tryDeserializeTextEscaped(nested_column, buf_, settings);
else
return nested_serialization->tryDeserializeTextRaw(nested_column, buf_, settings);
}
};
/// Some data types can deserialize absence of data (e.g. empty string), so eof is ok.
if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0]))
{
/// This is not null, surely.
return safeDeserialize<ReturnType>(column, *nested_serialization,
[] { return false; },
[&nested_serialization, &istr, &settings] (IColumn & nested_column)
{
if constexpr (escaped)
nested_serialization->deserializeTextEscaped(nested_column, istr, settings);
else
nested_serialization->deserializeTextRaw(nested_column, istr, settings);
});
return deserializeImpl<ReturnType>(column, istr, [](ReadBuffer &){ return false; }, deserialize_nested, is_null);
}
/// Check if we have enough data in buffer to check if it's a null.
if (istr.available() > null_representation.size())
{
auto check_for_null = [&istr, &null_representation]()
auto check_for_null = [&null_representation](ReadBuffer & buf)
{
auto * pos = istr.position();
if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n'))
auto * pos = buf.position();
if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'))
return true;
istr.position() = pos;
buf.position() = pos;
return false;
};
auto deserialize_nested = [&nested_serialization, &settings, &istr] (IColumn & nested_column)
{
if constexpr (escaped)
nested_serialization->deserializeTextEscaped(nested_column, istr, settings);
else
nested_serialization->deserializeTextRaw(nested_column, istr, settings);
};
return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
}
/// We don't have enough data in buffer to check if it's a null.
/// Use PeekableReadBuffer to make a checkpoint before checking null
/// representation and rollback if check was failed.
PeekableReadBuffer buf(istr, true);
auto check_for_null = [&buf, &null_representation]()
PeekableReadBuffer peekable_buf(istr, true);
auto check_for_null = [&null_representation](ReadBuffer & buf_)
{
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
buf.setCheckpoint();
SCOPE_EXIT(buf.dropCheckpoint());
if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'))
@ -342,16 +346,18 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
return false;
};
auto deserialize_nested = [&nested_serialization, &settings, &buf, &null_representation, &istr] (IColumn & nested_column)
auto deserialize_nested_with_check = [&deserialize_nested, &nested_serialization, &settings, &null_representation, &istr] (IColumn & nested_column, ReadBuffer & buf_)
{
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
auto * pos = buf.position();
if constexpr (escaped)
nested_serialization->deserializeTextEscaped(nested_column, buf, settings);
else
nested_serialization->deserializeTextRaw(nested_column, buf, settings);
if constexpr (throw_exception)
deserialize_nested(nested_column, buf);
else if (!deserialize_nested(nested_column, buf))
return ReturnType(false);
/// Check that we don't have any unread data in PeekableReadBuffer own memory.
if (likely(!buf.hasUnreadData()))
return;
return ReturnType(true);
/// We have some unread data in PeekableReadBuffer own memory.
/// It can happen only if there is a string instead of a number
@ -360,6 +366,9 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
/// We also should delete incorrectly deserialized value from nested column.
nested_column.popBack(1);
if constexpr (!throw_exception)
return ReturnType(false);
if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
"containing '\\t' or '\\n' may not work correctly for large input.");
@ -377,7 +386,63 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
istr.count(), std::string(pos, buf.position() - pos), parsed_value.str());
};
return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
return deserializeImpl<ReturnType>(column, peekable_buf, check_for_null, deserialize_nested_with_check, is_null);
}
void SerializationNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
deserializeTextEscapedAndRawImpl<void, true>(col.getNestedColumn(), istr, settings, nested, is_null);
safeAppendToNullMap<void>(col, is_null);
}
bool SerializationNullable::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
return deserializeTextEscapedAndRawImpl<bool, true>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
}
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextEscaped(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization)
{
bool is_null;
deserializeTextEscapedAndRawImpl<void, true>(nested_column, istr, settings, nested_serialization, is_null);
return !is_null;
}
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextEscaped(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization)
{
bool is_null;
return deserializeTextEscapedAndRawImpl<bool, true>(nested_column, istr, settings, nested_serialization, is_null);
}
void SerializationNullable::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
deserializeTextEscapedAndRawImpl<void, false>(col.getNestedColumn(), istr, settings, nested, is_null);
safeAppendToNullMap<void>(col, is_null);
}
bool SerializationNullable::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
return deserializeTextEscapedAndRawImpl<bool, false>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
}
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization)
{
bool is_null;
deserializeTextEscapedAndRawImpl<void, false>(nested_column, istr, settings, nested_serialization, is_null);
return !is_null;
}
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization)
{
bool is_null;
return deserializeTextEscapedAndRawImpl<bool, false>(nested_column, istr, settings, nested_serialization, is_null);
}
void SerializationNullable::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -385,45 +450,51 @@ void SerializationNullable::serializeTextQuoted(const IColumn & column, size_t r
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
if (col.isNullAt(row_num))
writeCString("NULL", ostr);
serializeNullQuoted(ostr);
else
nested->serializeTextQuoted(col.getNestedColumn(), row_num, ostr, settings);
}
void SerializationNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void SerializationNullable::serializeNullQuoted(DB::WriteBuffer & ostr)
{
deserializeTextQuotedImpl<void>(column, istr, settings, nested);
writeCString("NULL", ostr);
}
bool SerializationNullable::tryDeserializeNullQuoted(DB::ReadBuffer & istr)
{
return checkStringCaseInsensitive("NULL", istr);
}
template<typename ReturnType>
ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
const SerializationPtr & nested)
ReturnType deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
auto deserialize_nested = [&nested, &settings] (IColumn & nested_column, ReadBuffer & buf)
{
if constexpr (!throw_exception)
return nested->tryDeserializeTextQuoted(nested_column, buf, settings);
nested->deserializeTextQuoted(nested_column, buf, settings);
};
if (istr.eof() || (*istr.position() != 'N' && *istr.position() != 'n'))
{
/// This is not null, surely.
return safeDeserialize<ReturnType>(column, *nested,
[] { return false; },
[&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextQuoted(nested_column, istr, settings); });
return deserializeImpl<ReturnType>(column, istr, [](ReadBuffer &){ return false; }, deserialize_nested, is_null);
}
/// Check if we have enough data in buffer to check if it's a null.
if (istr.available() >= 4)
{
auto check_for_null = [&istr]()
auto check_for_null = [](ReadBuffer & buf)
{
auto * pos = istr.position();
if (checkStringCaseInsensitive("NULL", istr))
auto * pos = buf.position();
if (checkStringCaseInsensitive("NULL", buf))
return true;
istr.position() = pos;
buf.position() = pos;
return false;
};
auto deserialize_nested = [&nested, &settings, &istr] (IColumn & nested_column)
{
nested->deserializeTextQuoted(nested_column, istr, settings);
};
return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
}
/// We don't have enough data in buffer to check if it's a NULL
@ -431,9 +502,10 @@ ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, Re
/// to differentiate for example NULL and NaN for float)
/// Use PeekableReadBuffer to make a checkpoint before checking
/// null and rollback if the check was failed.
PeekableReadBuffer buf(istr, true);
auto check_for_null = [&buf]()
PeekableReadBuffer peekable_buf(istr, true);
auto check_for_null = [](ReadBuffer & buf_)
{
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
buf.setCheckpoint();
SCOPE_EXIT(buf.dropCheckpoint());
if (checkStringCaseInsensitive("NULL", buf))
@ -443,39 +515,74 @@ ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, Re
return false;
};
auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column)
auto deserialize_nested_with_check = [&deserialize_nested] (IColumn & nested_column, ReadBuffer & buf_)
{
nested->deserializeTextQuoted(nested_column, buf, settings);
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
if constexpr (throw_exception)
deserialize_nested(nested_column, buf);
else if (!deserialize_nested(nested_column, buf))
return false;
/// Check that we don't have any unread data in PeekableReadBuffer own memory.
if (likely(!buf.hasUnreadData()))
return;
return ReturnType(true);
/// We have some unread data in PeekableReadBuffer own memory.
/// It can happen only if there is an unquoted string instead of a number.
/// We also should delete incorrectly deserialized value from nested column.
nested_column.popBack(1);
if constexpr (!throw_exception)
return ReturnType(false);
throw DB::Exception(
ErrorCodes::CANNOT_READ_ALL_DATA,
"Error while parsing Nullable: got an unquoted string {} instead of a number",
String(buf.position(), std::min(10ul, buf.available())));
};
return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
return deserializeImpl<ReturnType>(column, peekable_buf, check_for_null, deserialize_nested_with_check, is_null);
}
void SerializationNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void SerializationNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeWholeTextImpl<void>(column, istr, settings, nested);
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
deserializeTextQuotedImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null);
safeAppendToNullMap<void>(col, is_null);
}
bool SerializationNullable::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
return deserializeTextQuotedImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
}
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
deserializeTextQuotedImpl<void>(nested_column, istr, settings, nested_serialization, is_null);
return !is_null;
}
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
return deserializeTextQuotedImpl<bool>(nested_column, istr, settings, nested_serialization, is_null);
}
template <typename ReturnType>
ReturnType SerializationNullable::deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
const SerializationPtr & nested)
ReturnType deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null)
{
PeekableReadBuffer buf(istr, true);
auto check_for_null = [&buf]()
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
PeekableReadBuffer peekable_buf(istr, true);
auto check_for_null = [](ReadBuffer & buf_)
{
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
buf.setCheckpoint();
SCOPE_EXIT(buf.dropCheckpoint());
@ -490,15 +597,46 @@ ReturnType SerializationNullable::deserializeWholeTextImpl(IColumn & column, Rea
return false;
};
auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column)
auto deserialize_nested = [&nested, &settings] (IColumn & nested_column, ReadBuffer & buf_)
{
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
if constexpr (!throw_exception)
return nested->tryDeserializeWholeText(nested_column, buf, settings);
nested->deserializeWholeText(nested_column, buf, settings);
assert(!buf.hasUnreadData());
};
return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
return deserializeImpl<ReturnType>(column, peekable_buf, check_for_null, deserialize_nested, is_null);
}
void SerializationNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
deserializeWholeTextImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null);
safeAppendToNullMap<void>(col, is_null);
}
bool SerializationNullable::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
return deserializeWholeTextImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
}
bool SerializationNullable::deserializeNullAsDefaultOrNestedWholeText(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
deserializeWholeTextImpl<void>(nested_column, istr, settings, nested_serialization, is_null);
return !is_null;
}
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedWholeText(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
return deserializeWholeTextImpl<bool>(nested_column, istr, settings, nested_serialization, is_null);
}
void SerializationNullable::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
@ -510,48 +648,56 @@ void SerializationNullable::serializeTextCSV(const IColumn & column, size_t row_
nested->serializeTextCSV(col.getNestedColumn(), row_num, ostr, settings);
}
void SerializationNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void SerializationNullable::serializeNullCSV(DB::WriteBuffer & ostr, const DB::FormatSettings & settings)
{
deserializeTextCSVImpl<void>(column, istr, settings, nested);
writeString(settings.csv.null_representation, ostr);
}
bool SerializationNullable::tryDeserializeNullCSV(DB::ReadBuffer & istr, const DB::FormatSettings & settings)
{
return checkString(settings.csv.null_representation, istr);
}
template<typename ReturnType>
ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
const SerializationPtr & nested_serialization)
ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
auto deserialize_nested = [&nested_serialization, &settings] (IColumn & nested_column, ReadBuffer & buf)
{
if constexpr (!throw_exception)
return nested_serialization->tryDeserializeTextCSV(nested_column, buf, settings);
nested_serialization->deserializeTextCSV(nested_column, buf, settings);
};
const String & null_representation = settings.csv.null_representation;
if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0]))
{
/// This is not null, surely.
return safeDeserialize<ReturnType>(column, *nested_serialization,
[] { return false; },
[&nested_serialization, &istr, &settings] (IColumn & nested_column) { nested_serialization->deserializeTextCSV(nested_column, istr, settings); });
return deserializeImpl<ReturnType>(column, istr, [](ReadBuffer &){ return false; }, deserialize_nested, is_null);
}
/// Check if we have enough data in buffer to check if it's a null.
if (settings.csv.custom_delimiter.empty() && istr.available() > null_representation.size())
{
auto check_for_null = [&istr, &null_representation, &settings]()
auto check_for_null = [&null_representation, &settings](ReadBuffer & buf)
{
auto * pos = istr.position();
if (checkString(null_representation, istr) && (*istr.position() == settings.csv.delimiter || *istr.position() == '\r' || *istr.position() == '\n'))
auto * pos = buf.position();
if (checkString(null_representation, buf) && (*buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n'))
return true;
istr.position() = pos;
buf.position() = pos;
return false;
};
auto deserialize_nested = [&nested_serialization, &settings, &istr] (IColumn & nested_column)
{
nested_serialization->deserializeTextCSV(nested_column, istr, settings);
};
return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
}
/// We don't have enough data in buffer to check if it's a null.
/// Use PeekableReadBuffer to make a checkpoint before checking null
/// representation and rollback if the check was failed.
PeekableReadBuffer buf(istr, true);
auto check_for_null = [&buf, &null_representation, &settings]()
PeekableReadBuffer peekable_buf(istr, true);
auto check_for_null = [&null_representation, &settings](ReadBuffer & buf_)
{
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
buf.setCheckpoint();
SCOPE_EXIT(buf.dropCheckpoint());
if (checkString(null_representation, buf))
@ -574,13 +720,18 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
return false;
};
auto deserialize_nested = [&nested_serialization, &settings, &buf, &null_representation, &istr] (IColumn & nested_column)
auto deserialize_nested_with_check = [&deserialize_nested, &nested_serialization, &settings, &null_representation, &istr] (IColumn & nested_column, ReadBuffer & buf_)
{
auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
auto * pos = buf.position();
nested_serialization->deserializeTextCSV(nested_column, buf, settings);
if constexpr (throw_exception)
deserialize_nested(nested_column, buf);
else if (!deserialize_nested(nested_column, buf))
return ReturnType(false);
/// Check that we don't have any unread data in PeekableReadBuffer own memory.
if (likely(!buf.hasUnreadData()))
return;
return ReturnType(true);
/// We have some unread data in PeekableReadBuffer own memory.
/// It can happen only if there is an unquoted string instead of a number
@ -589,6 +740,9 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
/// We also should delete incorrectly deserialized value from nested column.
nested_column.popBack(1);
if constexpr (!throw_exception)
return ReturnType(false);
if (null_representation.find(settings.csv.delimiter) != std::string::npos || null_representation.find('\r') != std::string::npos
|| null_representation.find('\n') != std::string::npos)
throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "CSV custom null representation containing "
@ -604,7 +758,35 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
istr.count(), std::string(pos, buf.position() - pos), parsed_value.str());
};
return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
return deserializeImpl<ReturnType>(column, peekable_buf, check_for_null, deserialize_nested_with_check, is_null);
}
void SerializationNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
deserializeTextCSVImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null);
safeAppendToNullMap<void>(col, is_null);
}
bool SerializationNullable::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
return deserializeTextCSVImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
}
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
deserializeTextCSVImpl<void>(nested_column, istr, settings, nested_serialization, is_null);
return !is_null;
}
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
return deserializeTextCSVImpl<bool>(nested_column, istr, settings, nested_serialization, is_null);
}
void SerializationNullable::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -618,38 +800,86 @@ void SerializationNullable::serializeText(const IColumn & column, size_t row_num
/// This assumes UTF-8 and proper font support. This is Ok, because Pretty formats are "presentational", not for data exchange.
if (col.isNullAt(row_num))
{
if (settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8)
writeCString("ᴺᵁᴸᴸ", ostr);
else
writeCString("NULL", ostr);
}
serializeNullText(ostr, settings);
else
nested->serializeText(col.getNestedColumn(), row_num, ostr, settings);
}
void SerializationNullable::serializeNullText(DB::WriteBuffer & ostr, const DB::FormatSettings & settings)
{
if (settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8)
writeCString("ᴺᵁᴸᴸ", ostr);
else
writeCString("NULL", ostr);
}
bool SerializationNullable::tryDeserializeNullText(DB::ReadBuffer & istr)
{
if (checkCharCaseInsensitive('N', istr))
return checkStringCaseInsensitive("ULL", istr);
return checkStringCaseInsensitive("ᴺᵁᴸᴸ", istr);
}
void SerializationNullable::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
if (col.isNullAt(row_num))
writeCString("null", ostr);
serializeNullJSON(ostr);
else
nested->serializeTextJSON(col.getNestedColumn(), row_num, ostr, settings);
}
void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
void SerializationNullable::serializeNullJSON(DB::WriteBuffer & ostr)
{
deserializeTextJSONImpl<void>(column, istr, settings, nested);
writeCString("null", ostr);
}
bool SerializationNullable::tryDeserializeNullJSON(DB::ReadBuffer & istr)
{
return checkString("null", istr);
}
template<typename ReturnType>
ReturnType SerializationNullable::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
const SerializationPtr & nested)
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null)
{
return safeDeserialize<ReturnType>(column, *nested,
[&istr] { return checkStringByFirstCharacterAndAssertTheRest("null", istr); },
[&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextJSON(nested_column, istr, settings); });
auto check_for_null = [](ReadBuffer & buf){ return checkStringByFirstCharacterAndAssertTheRest("null", buf); };
auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf)
{
if constexpr (std::is_same_v<ReturnType, bool>)
return nested->tryDeserializeTextJSON(nested_column, buf, settings);
nested->deserializeTextJSON(nested_column, buf, settings);
};
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
}
void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
deserializeTextJSONImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null);
safeAppendToNullMap<void>(col, is_null);
}
bool SerializationNullable::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null;
return deserializeTextJSONImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
}
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
deserializeTextJSONImpl<void>(nested_column, istr, settings, nested_serialization, is_null);
return !is_null;
}
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
return deserializeTextJSONImpl<bool>(nested_column, istr, settings, nested_serialization, is_null);
}
void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -662,11 +892,9 @@ void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_
nested->serializeTextXML(col.getNestedColumn(), row_num, ostr, settings);
}
template bool SerializationNullable::deserializeWholeTextImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
template bool SerializationNullable::deserializeTextEscapedImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
template bool SerializationNullable::deserializeTextQuotedImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
template bool SerializationNullable::deserializeTextCSVImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
template bool SerializationNullable::deserializeTextJSONImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
template bool SerializationNullable::deserializeTextRawImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
void SerializationNullable::serializeNullXML(DB::WriteBuffer & ostr)
{
writeCString("\\N", ostr);
}
}

View File

@ -51,9 +51,12 @@ public:
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@ -66,31 +69,49 @@ public:
* In CSV, non-NULL string value, starting with \N characters, must be placed in quotes, to avoid ambiguity.
*/
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
/// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false)
/// If ReturnType is void, deserialize Nullable(T)
template <typename ReturnType = bool>
static ReturnType deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextEscapedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
template <typename ReturnType = bool, bool escaped>
static ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
/// If Check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false)
static bool deserializeNullAsDefaultOrNestedWholeText(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextEscaped(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
/// If Check for NULL and deserialize value into non-nullable column or insert default value of nested type.
/// Return true if parsing was successful and false in case of any error.
static bool tryDeserializeNullAsDefaultOrNestedWholeText(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
static bool tryDeserializeNullAsDefaultOrNestedTextEscaped(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
static bool tryDeserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
static bool tryDeserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
static bool tryDeserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
static bool tryDeserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
static void serializeNullEscaped(WriteBuffer & ostr, const FormatSettings & settings);
static bool tryDeserializeNullEscaped(ReadBuffer & istr, const FormatSettings & settings);
static void serializeNullQuoted(WriteBuffer & ostr);
static bool tryDeserializeNullQuoted(ReadBuffer & istr);
static void serializeNullCSV(WriteBuffer & ostr, const FormatSettings & settings);
static bool tryDeserializeNullCSV(ReadBuffer & istr, const FormatSettings & settings);
static void serializeNullJSON(WriteBuffer & ostr);
static bool tryDeserializeNullJSON(ReadBuffer & istr);
static void serializeNullRaw(WriteBuffer & ostr, const FormatSettings & settings);
static bool tryDeserializeNullRaw(ReadBuffer & istr, const FormatSettings & settings);
static void serializeNullText(WriteBuffer & ostr, const FormatSettings & settings);
static bool tryDeserializeNullText(ReadBuffer & istr);
static void serializeNullXML(WriteBuffer & ostr);
private:
struct SubcolumnCreator : public ISubcolumnCreator

View File

@ -37,6 +37,18 @@ void SerializationNumber<T>::deserializeText(IColumn & column, ReadBuffer & istr
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Number");
}
template <typename T>
bool SerializationNumber<T>::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const
{
T x;
if (!tryReadText(x, istr) || (whole && !istr.eof()))
return false;
assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
return true;
}
template <typename T>
void SerializationNumber<T>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
@ -44,9 +56,10 @@ void SerializationNumber<T>::serializeTextJSON(const IColumn & column, size_t ro
writeJSONNumber(x, ostr, settings);
}
template <typename T>
void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
template <typename T, typename ReturnType>
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
bool has_quote = false;
if (!istr.eof() && *istr.position() == '"') /// We understand the number both in quotes and without.
{
@ -54,13 +67,16 @@ void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer &
++istr.position();
}
FieldType x;
T x;
/// null
if (!has_quote && !istr.eof() && *istr.position() == 'n')
{
++istr.position();
assertString("ull", istr);
if constexpr (throw_exception)
assertString("ull", istr);
else if (!checkString("ull", istr))
return ReturnType(false);
x = NaNOrZero<T>();
}
@ -73,26 +89,62 @@ void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer &
{
// extra conditions to parse true/false strings into 1/0
if (istr.eof())
throwReadAfterEOF();
{
if constexpr (throw_exception)
throwReadAfterEOF();
else
return false;
}
if (*istr.position() == 't' || *istr.position() == 'f')
{
bool tmp = false;
readBoolTextWord(tmp, istr);
if constexpr (throw_exception)
readBoolTextWord(tmp, istr);
else if (!readBoolTextWord<bool>(tmp, istr))
return ReturnType(false);
x = tmp;
}
else
readText(x, istr);
{
if constexpr (throw_exception)
readText(x, istr);
else if (!tryReadText(x, istr))
return ReturnType(false);
}
}
else
{
readText(x, istr);
if constexpr (throw_exception)
readText(x, istr);
else if (!tryReadText(x, istr))
return ReturnType(false);
}
if (has_quote)
assertChar('"', istr);
{
if constexpr (throw_exception)
assertChar('"', istr);
else if (!checkChar('"', istr))
return ReturnType(false);
}
}
assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
return ReturnType(true);
}
template <typename T>
void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextJSONImpl<T, void>(column, istr, settings);
}
template <typename T>
bool SerializationNumber<T>::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return deserializeTextJSONImpl<T, bool>(column, istr, settings);
}
template <typename T>
@ -103,6 +155,16 @@ void SerializationNumber<T>::deserializeTextCSV(IColumn & column, ReadBuffer & i
assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
}
template <typename T>
bool SerializationNumber<T>::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & /*settings*/) const
{
FieldType x;
if (!tryReadCSV(x, istr))
return false;
assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
return true;
}
template <typename T>
void SerializationNumber<T>::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
{

View File

@ -20,9 +20,12 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
/** Format is platform-dependent. */
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;

View File

@ -272,40 +272,67 @@ void SerializationString::serializeTextEscaped(const IColumn & column, size_t ro
}
template <typename Reader>
static inline void read(IColumn & column, Reader && reader)
template <typename ReturnType, typename Reader>
static inline ReturnType read(IColumn & column, Reader && reader)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
ColumnString & column_string = assert_cast<ColumnString &>(column);
ColumnString::Chars & data = column_string.getChars();
ColumnString::Offsets & offsets = column_string.getOffsets();
size_t old_chars_size = data.size();
size_t old_offsets_size = offsets.size();
try
{
reader(data);
data.push_back(0);
offsets.push_back(data.size());
}
catch (...)
auto restore_column = [&]()
{
offsets.resize_assume_reserved(old_offsets_size);
data.resize_assume_reserved(old_chars_size);
throw;
};
try
{
if constexpr (throw_exception)
{
reader(data);
}
else if (!reader(data))
{
restore_column();
return false;
}
data.push_back(0);
offsets.push_back(data.size());
return ReturnType(true);
}
catch (...)
{
restore_column();
if constexpr (throw_exception)
throw;
else
return false;
}
}
void SerializationString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); });
read<void>(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); });
}
bool SerializationString::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
return read<bool>(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); return true; });
}
void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); });
read<void>(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); });
}
bool SerializationString::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
return read<bool>(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); return true; });
}
void SerializationString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
@ -315,7 +342,12 @@ void SerializationString::serializeTextQuoted(const IColumn & column, size_t row
void SerializationString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(column, [&](ColumnString::Chars & data) { readQuotedStringInto<true>(data, istr); });
read<void>(column, [&](ColumnString::Chars & data) { readQuotedStringInto<true>(data, istr); });
}
bool SerializationString::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
return read<bool>(column, [&](ColumnString::Chars & data) { return tryReadQuotedStringInto<true>(data, istr); });
}
@ -329,11 +361,11 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
{
if (settings.json.read_objects_as_strings && !istr.eof() && *istr.position() == '{')
{
read(column, [&](ColumnString::Chars & data) { readJSONObjectPossiblyInvalid(data, istr); });
read<void>(column, [&](ColumnString::Chars & data) { readJSONObjectPossiblyInvalid(data, istr); });
}
else if (settings.json.read_arrays_as_strings && !istr.eof() && *istr.position() == '[')
{
read(column, [&](ColumnString::Chars & data) { readJSONArrayInto(data, istr); });
read<void>(column, [&](ColumnString::Chars & data) { readJSONArrayInto(data, istr); });
}
else if (settings.json.read_bools_as_strings && !istr.eof() && (*istr.position() == 't' || *istr.position() == 'f'))
{
@ -349,7 +381,7 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
str_value = "false";
}
read(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
read<void>(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
}
else if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
{
@ -358,12 +390,60 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
Float64 tmp;
ReadBufferFromString buf(field);
if (tryReadFloatText(tmp, buf) && buf.eof())
read(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
read<void>(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
else
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON String value here: {}", field);
}
else
read(column, [&](ColumnString::Chars & data) { readJSONStringInto(data, istr); });
read<void>(column, [&](ColumnString::Chars & data) { readJSONStringInto(data, istr); });
}
bool SerializationString::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.json.read_objects_as_strings && !istr.eof() && *istr.position() == '{')
return read<bool>(column, [&](ColumnString::Chars & data) { return readJSONObjectPossiblyInvalid<ColumnString::Chars, bool>(data, istr); });
if (settings.json.read_arrays_as_strings && !istr.eof() && *istr.position() == '[')
return read<bool>(column, [&](ColumnString::Chars & data) { return readJSONArrayInto<ColumnString::Chars, bool>(data, istr); });
if (settings.json.read_bools_as_strings && !istr.eof() && (*istr.position() == 't' || *istr.position() == 'f'))
{
String str_value;
if (*istr.position() == 't')
{
if (!checkString("true", istr))
return false;
str_value = "true";
}
else if (*istr.position() == 'f')
{
if (!checkString("false", istr))
return false;
str_value = "false";
}
read<void>(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
return true;
}
if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
{
String field;
if (!tryReadJSONField(field, istr))
return false;
Float64 tmp;
ReadBufferFromString buf(field);
if (tryReadFloatText(tmp, buf) && buf.eof())
{
read<void>(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
return true;
}
return false;
}
return read<bool>(column, [&](ColumnString::Chars & data) { return tryReadJSONStringInto(data, istr); });
}
@ -381,7 +461,12 @@ void SerializationString::serializeTextCSV(const IColumn & column, size_t row_nu
void SerializationString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
read(column, [&](ColumnString::Chars & data) { readCSVStringInto(data, istr, settings.csv); });
read<void>(column, [&](ColumnString::Chars & data) { readCSVStringInto(data, istr, settings.csv); });
}
bool SerializationString::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return read<bool>(column, [&](ColumnString::Chars & data) { readCSVStringInto<ColumnString::Chars, false, false>(data, istr, settings.csv); return true; });
}
void SerializationString::serializeTextMarkdown(

View File

@ -18,20 +18,25 @@ public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextMarkdown(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
};

View File

@ -62,15 +62,38 @@ void SerializationTuple::serializeBinary(const IColumn & column, size_t row_num,
}
template <typename F>
static void addElementSafe(size_t num_elems, IColumn & column, F && impl)
template <typename ReturnType = void, typename F>
static ReturnType addElementSafe(size_t num_elems, IColumn & column, F && impl)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
/// We use the assumption that tuples of zero size do not exist.
size_t old_size = column.size();
auto restore_elements = [&]()
{
for (size_t i = 0; i < num_elems; ++i)
{
auto & element_column = extractElementColumn(column, i);
if (element_column.size() > old_size)
{
chassert(element_column.size() - old_size == 1);
element_column.popBack(1);
}
}
};
try
{
impl();
if constexpr (throw_exception)
{
impl();
}
else if (!impl())
{
restore_elements();
return ReturnType(false);
}
// Check that all columns now have the same size.
size_t new_size = column.size();
@ -81,22 +104,23 @@ static void addElementSafe(size_t num_elems, IColumn & column, F && impl)
{
// This is not a logical error because it may work with
// user-supplied data.
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
"Cannot read a tuple because not all elements are present");
if constexpr (throw_exception)
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
"Cannot read a tuple because not all elements are present");
restore_elements();
return ReturnType(false);
}
}
}
catch (...)
{
for (size_t i = 0; i < num_elems; ++i)
{
auto & element_column = extractElementColumn(column, i);
if (element_column.size() > old_size)
element_column.popBack(1);
}
throw;
restore_elements();
if constexpr (throw_exception)
throw;
return ReturnType(false);
}
return ReturnType(true);
}
void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@ -120,25 +144,51 @@ void SerializationTuple::serializeText(const IColumn & column, size_t row_num, W
writeChar(')', ostr);
}
void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
template <typename ReturnType>
ReturnType SerializationTuple::deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
{
const size_t size = elems.size();
assertChar('(', istr);
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
addElementSafe(elems.size(), column, [&]
const size_t size = elems.size();
if constexpr (throw_exception)
assertChar('(', istr);
else if (!checkChar('(', istr))
return ReturnType(false);
auto impl = [&]()
{
for (size_t i = 0; i < size; ++i)
{
skipWhitespaceIfAny(istr);
if (i != 0)
{
assertChar(',', istr);
if constexpr (throw_exception)
assertChar(',', istr);
else if (!checkChar(',', istr))
return ReturnType(false);
skipWhitespaceIfAny(istr);
}
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(extractElementColumn(column, i), istr, settings, elems[i]);
auto & element_column = extractElementColumn(column, i);
if constexpr (throw_exception)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(element_column, istr, settings, elems[i]);
else
elems[i]->deserializeTextQuoted(element_column, istr, settings);
}
else
elems[i]->deserializeTextQuoted(extractElementColumn(column, i), istr, settings);
{
bool ok;
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
ok = SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextQuoted(element_column, istr, settings, elems[i]);
else
ok = elems[i]->tryDeserializeTextQuoted(element_column, istr, settings);
if (!ok)
return false;
}
}
// Special format for one element tuple (1,)
@ -150,11 +200,32 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co
}
skipWhitespaceIfAny(istr);
assertChar(')', istr);
if constexpr (throw_exception)
assertChar(')', istr);
else if (!checkChar(')', istr))
return ReturnType(false);
if (whole && !istr.eof())
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Tuple");
});
{
if constexpr (throw_exception)
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Tuple");
return ReturnType(false);
}
return ReturnType(true);
};
return addElementSafe<ReturnType>(elems.size(), column, impl);
}
void SerializationTuple::deserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, bool whole) const
{
deserializeTextImpl(column, istr, settings, whole);
}
bool SerializationTuple::tryDeserializeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, bool whole) const
{
return deserializeTextImpl<bool>(column, istr, settings, whole);
}
void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -239,16 +310,39 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t
}
}
void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
template <typename ReturnType>
ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
auto deserialize_element = [&](IColumn & element_column, size_t element_pos)
{
if constexpr (throw_exception)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]);
else
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
}
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]);
return elems[element_pos]->tryDeserializeTextJSON(element_column, istr, settings);
}
};
if (settings.json.read_named_tuples_as_objects
&& have_explicit_names)
{
skipWhitespaceIfAny(istr);
assertChar('{', istr);
if constexpr (throw_exception)
assertChar('{', istr);
else if (!checkChar('{', istr))
return ReturnType(false);
skipWhitespaceIfAny(istr);
addElementSafe(elems.size(), column, [&]
auto impl = [&]()
{
std::vector<UInt8> seen_elements(elems.size(), 0);
size_t processed = 0;
@ -256,18 +350,32 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
while (!istr.eof() && *istr.position() != '}')
{
if (!settings.json.ignore_unknown_keys_in_named_tuple && processed == elems.size())
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {} (consider enabling input_format_json_ignore_unknown_keys_in_named_tuple setting)", elems.size());
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {} (consider enabling input_format_json_ignore_unknown_keys_in_named_tuple setting)", elems.size());
return ReturnType(false);
}
if (processed + skipped > 0)
{
assertChar(',', istr);
if constexpr (throw_exception)
assertChar(',', istr);
else if (!checkChar(',', istr))
return ReturnType(false);
skipWhitespaceIfAny(istr);
}
std::string name;
readDoubleQuotedString(name, istr);
if constexpr (throw_exception)
readDoubleQuotedString(name, istr);
else if (!tryReadDoubleQuotedString(name, istr))
return ReturnType(false);
skipWhitespaceIfAny(istr);
assertChar(':', istr);
if constexpr (throw_exception)
assertChar(':', istr);
else if (!checkChar(':', istr))
return ReturnType(false);
skipWhitespaceIfAny(istr);
const size_t element_pos = getPositionByName(name);
@ -275,36 +383,52 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
{
if (settings.json.ignore_unknown_keys_in_named_tuple)
{
skipJSONField(istr, name);
if constexpr (throw_exception)
skipJSONField(istr, name);
else if (!trySkipJSONField(istr, name))
return ReturnType(false);
skipWhitespaceIfAny(istr);
++skipped;
continue;
}
else
throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}', enable setting input_format_json_ignore_unknown_keys_in_named_tuple", name);
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}', enable setting input_format_json_ignore_unknown_keys_in_named_tuple", name);
return ReturnType(false);
}
}
seen_elements[element_pos] = 1;
auto & element_column = extractElementColumn(column, element_pos);
try
if constexpr (throw_exception)
{
if (settings.null_as_default)
SerializationNullable::deserializeTextJSONImpl(element_column, istr, settings, elems[element_pos]);
else
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
try
{
deserialize_element(element_column, element_pos);
}
catch (Exception & e)
{
e.addMessage("(while reading the value of nested key " + name + ")");
throw;
}
}
catch (Exception & e)
else
{
e.addMessage("(while reading the value of nested key " + name + ")");
throw;
if (!deserialize_element(element_column, element_pos))
return ReturnType(false);
}
skipWhitespaceIfAny(istr);
++processed;
}
assertChar('}', istr);
if constexpr (throw_exception)
assertChar('}', istr);
else if (!checkChar('}', istr))
return ReturnType(false);
/// Check if we have missing elements.
if (processed != elems.size())
@ -315,41 +439,81 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
continue;
if (!settings.json.defaults_for_missing_elements_in_named_tuple)
throw Exception(
ErrorCodes::INCORRECT_DATA,
"JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
"enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
elems[element_pos]->getElementName());
{
if constexpr (throw_exception)
throw Exception(
ErrorCodes::INCORRECT_DATA,
"JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
"enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
elems[element_pos]->getElementName());
return ReturnType(false);
}
auto & element_column = extractElementColumn(column, element_pos);
element_column.insertDefault();
}
}
});
return ReturnType(true);
};
return addElementSafe<ReturnType>(elems.size(), column, impl);
}
else
{
assertChar('[', istr);
skipWhitespaceIfAny(istr);
if constexpr (throw_exception)
assertChar('[', istr);
else if (!checkChar('[', istr))
return ReturnType(false);
skipWhitespaceIfAny(istr);
addElementSafe(elems.size(), column, [&]
auto impl = [&]()
{
for (size_t i = 0; i < elems.size(); ++i)
{
skipWhitespaceIfAny(istr);
if (i != 0)
{
assertChar(',', istr);
if constexpr (throw_exception)
assertChar(',', istr);
else if (!checkChar(',', istr))
return ReturnType(false);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeTextJSON(extractElementColumn(column, i), istr, settings);
auto & element_column = extractElementColumn(column, i);
if constexpr (throw_exception)
deserialize_element(element_column, i);
else if (!deserialize_element(element_column, i))
return ReturnType(false);
}
skipWhitespaceIfAny(istr);
assertChar(']', istr);
});
if constexpr (throw_exception)
assertChar(']', istr);
else if (!checkChar(']', istr))
return ReturnType(false);
return ReturnType(true);
};
return addElementSafe<ReturnType>(elems.size(), column, impl);
}
}
void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
deserializeTextJSONImpl(column, istr, settings);
}
bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
return deserializeTextJSONImpl<bool>(column, istr, settings);
}
void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeCString("<tuple>", ostr);
@ -385,14 +549,48 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
assertChar(settings.csv.tuple_delimiter, istr);
skipWhitespaceIfAny(istr);
}
if (settings.null_as_default)
SerializationNullable::deserializeTextCSVImpl(extractElementColumn(column, i), istr, settings, elems[i]);
auto & element_column = extractElementColumn(column, i);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]);
else
elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, settings);
elems[i]->deserializeTextCSV(element_column, istr, settings);
}
});
}
bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return addElementSafe<bool>(elems.size(), column, [&]
{
const size_t size = elems.size();
for (size_t i = 0; i < size; ++i)
{
if (i != 0)
{
skipWhitespaceIfAny(istr);
if (!checkChar(settings.csv.tuple_delimiter, istr))
return false;
skipWhitespaceIfAny(istr);
}
auto & element_column = extractElementColumn(column, i);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column))
{
if (!SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]))
return false;
}
else
{
if (!elems[i]->tryDeserializeTextCSV(element_column, istr, settings))
return false;
}
}
return true;
});
}
void SerializationTuple::enumerateStreams(
EnumerateStreamsSettings & settings,
const StreamCallback & callback,

View File

@ -23,14 +23,17 @@ public:
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
/// Tuples in CSV format will be serialized as separate columns (that is, losing their nesting in the tuple).
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Each sub-column in a tuple is serialized in separate stream.
*/
@ -73,6 +76,15 @@ private:
bool have_explicit_names;
size_t getPositionByName(const String & name) const;
template <typename ReturnType = void>
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const;
template <typename ReturnType = void>
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
template <typename ReturnType = void>
ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
};
}

View File

@ -25,15 +25,16 @@ void SerializationUUID::deserializeText(IColumn & column, ReadBuffer & istr, con
throwUnexpectedDataAfterParsedValue(column, istr, settings, "UUID");
}
void SerializationUUID::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
bool SerializationUUID::tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const
{
deserializeText(column, istr, settings, false);
UUID x;
if (!tryReadText(x, istr) || (whole && !istr.eof()))
return false;
assert_cast<ColumnUUID &>(column).getData().push_back(x);
return true;
}
void SerializationUUID::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
}
void SerializationUUID::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
@ -76,6 +77,17 @@ void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
assert_cast<ColumnUUID &>(column).getData().push_back(std::move(uuid)); /// It's important to do this at the end - for exception safety.
}
bool SerializationUUID::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
UUID uuid;
String field;
if (!checkChar('\'', istr) || !tryReadText(uuid, istr) || !checkChar('\'', istr))
return false;
assert_cast<ColumnUUID &>(column).getData().push_back(std::move(uuid));
return true;
}
void SerializationUUID::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -92,6 +104,15 @@ void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
assert_cast<ColumnUUID &>(column).getData().push_back(x);
}
bool SerializationUUID::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
UUID x;
if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr))
return false;
assert_cast<ColumnUUID &>(column).getData().push_back(x);
return true;
}
void SerializationUUID::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
@ -106,6 +127,14 @@ void SerializationUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
assert_cast<ColumnUUID &>(column).getData().push_back(value);
}
bool SerializationUUID::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
UUID value;
if (!tryReadCSV(value, istr))
return false;
assert_cast<ColumnUUID &>(column).getData().push_back(value);
return true;
}
void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
{

View File

@ -10,14 +10,16 @@ class SerializationUUID : public SimpleTextSerialization
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;

View File

@ -0,0 +1,840 @@
#include <DataTypes/Serializations/SerializationVariant.h>
#include <DataTypes/Serializations/SerializationVariantElement.h>
#include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/Serializations/SerializationNamed.h>
#include <DataTypes/DataTypeVariant.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnVariant.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int INCORRECT_DATA;
}
void SerializationVariant::enumerateStreams(
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
const auto * type_variant = data.type ? &assert_cast<const DataTypeVariant &>(*data.type) : nullptr;
const auto * column_variant = data.column ? &assert_cast<const ColumnVariant &>(*data.column) : nullptr;
auto discriminators_serialization = std::make_shared<SerializationNamed>(std::make_shared<SerializationNumber<ColumnVariant::Discriminator>>(), "discr", SubstreamType::NamedVariantDiscriminators);
auto local_discriminators = column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr;
settings.path.push_back(Substream::VariantDiscriminators);
auto discriminators_data = SubstreamData(discriminators_serialization)
.withType(type_variant ? std::make_shared<DataTypeNumber<ColumnVariant::Discriminator>>() : nullptr)
.withColumn(column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr)
.withSerializationInfo(data.serialization_info);
settings.path.back().data = discriminators_data;
callback(settings.path);
settings.path.pop_back();
settings.path.push_back(Substream::VariantElements);
settings.path.back().data = data;
for (size_t i = 0; i < variants.size(); ++i)
{
settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
auto variant_data = SubstreamData(variants[i])
.withType(type_variant ? type_variant->getVariant(i) : nullptr)
.withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
.withSerializationInfo(data.serialization_info);
addVariantElementToPath(settings.path, i);
settings.path.back().data = variant_data;
variants[i]->enumerateStreams(settings, callback, variant_data);
settings.path.pop_back();
}
settings.path.pop_back();
}
struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
{
std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
};
struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
{
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
};
void SerializationVariant::serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>();
variant_state->states.resize(variants.size());
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i < variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]);
settings.path.pop_back();
}
settings.path.pop_back();
state = std::move(variant_state);
}
void SerializationVariant::serializeBinaryBulkStateSuffix(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i < variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]);
settings.path.pop_back();
}
settings.path.pop_back();
}
void SerializationVariant::deserializeBinaryBulkStatePrefix(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const
{
auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
variant_state->states.resize(variants.size());
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i < variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i]);
settings.path.pop_back();
}
settings.path.pop_back();
state = std::move(variant_state);
}
void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
if (const size_t size = col.size(); limit == 0 || offset + limit > size)
limit = size - offset;
settings.path.push_back(Substream::VariantDiscriminators);
auto * discriminators_stream = settings.getter(settings.path);
settings.path.pop_back();
if (!discriminators_stream)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkWithMultipleStreams");
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
/// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate
/// offsets and limits for variants and need to just serialize whole columns.
if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls())
{
/// First, serialize discriminators.
/// If we have only NULLs or local and global discriminators are the same, just serialize the column as is.
if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder())
{
SerializationNumber<ColumnVariant::Discriminator>().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit);
}
/// If local and global discriminators are different, we should convert local to global before serializing (because we don't serialize the mapping).
else
{
const auto & local_discriminators = col.getLocalDiscriminators();
for (size_t i = offset; i != offset + limit; ++i)
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
}
/// Second, serialize variants in global order.
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i != variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
settings.path.pop_back();
}
settings.path.pop_back();
return;
}
/// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant.
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
{
/// First, serialize discriminators.
/// We know that all discriminators are the same, so we just need to serialize this discriminator limit times.
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
for (size_t i = 0; i != limit; ++i)
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
/// Second, serialize non-empty variant (other variants are empty and we can skip their serialization).
settings.path.push_back(Substream::VariantElements);
addVariantElementToPath(settings.path, non_empty_global_discr);
/// We can use the same offset/limit as for whole Variant column
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
settings.path.pop_back();
settings.path.pop_back();
return;
}
/// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant.
const auto & local_discriminators = col.getLocalDiscriminators();
const auto & offsets = col.getOffsets();
std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variants.size(), {0, 0});
size_t end = offset + limit;
for (size_t i = offset; i < end; ++i)
{
auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]);
writeBinaryLittleEndian(global_discr, *discriminators_stream);
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
{
/// If we see this discriminator for the first time, update offset
if (!variant_offsets_and_limits[global_discr].second)
variant_offsets_and_limits[global_discr].first = offsets[i];
/// Update limit for this discriminator.
++variant_offsets_and_limits[global_discr].second;
}
}
/// Serialize variants in global order.
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i != variants.size(); ++i)
{
/// Serialize variant only if we have its discriminator in the range.
if (variant_offsets_and_limits[i].second)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkWithMultipleStreams(
col.getVariantByGlobalDiscriminator(i),
variant_offsets_and_limits[i].first,
variant_offsets_and_limits[i].second,
settings,
variant_state->states[i]);
settings.path.pop_back();
}
}
settings.path.pop_back();
}
void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
ColumnPtr & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const
{
auto mutable_column = column->assumeMutable();
ColumnVariant & col = assert_cast<ColumnVariant &>(*mutable_column);
/// We always serialize Variant column with global variants order,
/// so while deserialization column should be always with global variants order.
if (!col.hasGlobalVariantsOrder())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to deserialize data into Variant column with not global variants order");
/// First, deserialize discriminators.
settings.path.push_back(Substream::VariantDiscriminators);
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
{
col.getLocalDiscriminatorsPtr() = cached_discriminators;
}
else
{
auto * discriminators_stream = settings.getter(settings.path);
if (!discriminators_stream)
return;
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr());
}
settings.path.pop_back();
/// Second, calculate limits for each variant by iterating through new discriminators.
std::vector<size_t> variant_limits(variants.size(), 0);
auto & discriminators_data = col.getLocalDiscriminators();
size_t discriminators_offset = discriminators_data.size() - limit;
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
{
ColumnVariant::Discriminator discr = discriminators_data[i];
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
++variant_limits[discr];
}
/// Now we can deserialize variants according to their limits.
auto * variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i != variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache);
settings.path.pop_back();
}
settings.path.pop_back();
/// Fill offsets column.
/// It's important to do it after deserialization of all variants, because to fill offsets we need
/// initial variants sizes without values in current range, but some variants can be shared with
/// other columns via substream cache and they can already contain values from this range even
/// before we call deserialize for them. So, before deserialize we cannot know for sure if
/// variant columns already contain values from current range or not. But after calling deserialize
/// we know for sure that they contain these values, so we can use valiant limits and their
/// new sizes to calculate correct offsets.
settings.path.push_back(Substream::VariantOffsets);
if (auto cached_offsets = getFromSubstreamsCache(cache, settings.path))
{
col.getOffsetsPtr() = cached_offsets;
}
else
{
auto & offsets = col.getOffsets();
offsets.reserve(offsets.size() + limit);
std::vector<size_t> variant_offsets;
variant_offsets.reserve(variants.size());
for (size_t i = 0; i != variants.size(); ++i)
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
{
ColumnVariant::Discriminator discr = discriminators_data[i];
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
offsets.emplace_back();
else
offsets.push_back(variant_offsets[discr]++);
}
addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr());
}
settings.path.pop_back();
}
void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const
{
path.push_back(Substream::VariantElement);
path.back().variant_element_name = variant_names[i];
}
void SerializationVariant::serializeBinary(const Field & /*field*/, WriteBuffer & /*ostr*/, const FormatSettings & /*settings*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinary from a field is not implemented for SerializationVariant");
}
void SerializationVariant::deserializeBinary(Field & /*field*/, ReadBuffer & /*istr*/, const FormatSettings & /*settings*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method deserializeBinary to a field is not implemented for SerializationVariant");
}
void SerializationVariant::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto global_discr = col.globalDiscriminatorAt(row_num);
writeBinaryLittleEndian(global_discr, ostr);
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
variants[global_discr]->serializeBinary(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
}
void SerializationVariant::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
ColumnVariant & col = assert_cast<ColumnVariant &>(column);
ColumnVariant::Discriminator global_discr;
readBinaryLittleEndian(global_discr, istr);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
{
col.insertDefault();
}
else
{
auto & variant_column = col.getVariantByGlobalDiscriminator(global_discr);
variants[global_discr]->deserializeBinary(variant_column, istr, settings);
col.getLocalDiscriminators().push_back(col.localDiscriminatorByGlobal(global_discr));
col.getOffsets().push_back(variant_column.size() - 1);
}
}
namespace
{
const std::unordered_map<TypeIndex, size_t> & getTypesTextDeserializePriorityMap()
{
static std::unordered_map<TypeIndex, size_t> priority_map = []
{
static constexpr std::array priorities = {
/// Complex types have highest priority.
TypeIndex::Array,
TypeIndex::Tuple,
TypeIndex::Map,
TypeIndex::AggregateFunction,
/// Enums can be parsed both from strings and numbers.
/// So they have high enough priority.
TypeIndex::Enum8,
TypeIndex::Enum16,
/// Types that can be parsed from strings.
TypeIndex::UUID,
TypeIndex::IPv4,
TypeIndex::IPv6,
/// Types that can be parsed from numbers.
/// The order:
/// 1) Integers
/// 2) Big Integers
/// 3) Decimals
/// 4) Floats
/// In each group small types have higher priority.
TypeIndex::Int8,
TypeIndex::UInt8,
TypeIndex::Int16,
TypeIndex::UInt16,
TypeIndex::Int32,
TypeIndex::UInt32,
TypeIndex::Int64,
TypeIndex::UInt64,
TypeIndex::Int128,
TypeIndex::UInt128,
TypeIndex::Int256,
TypeIndex::UInt256,
TypeIndex::Decimal32,
TypeIndex::Decimal64,
TypeIndex::Decimal128,
TypeIndex::Decimal256,
TypeIndex::Float32,
TypeIndex::Float64,
/// Dates and DateTimes. More simple Date types have higher priority.
/// They have lower priority as numbers as some DateTimes sometimes can
/// be also parsed from numbers, but we don't want it usually.
TypeIndex::Date,
TypeIndex::Date32,
TypeIndex::DateTime,
TypeIndex::DateTime64,
/// String types have almost the lowest priority,
/// as in text formats almost all data can
/// be deserialized into String type.
TypeIndex::FixedString,
TypeIndex::String,
};
std::unordered_map<TypeIndex, size_t> pm;
pm.reserve(priorities.size());
for (size_t i = 0; i != priorities.size(); ++i)
pm[priorities[i]] = priorities.size() - i;
return pm;
}();
return priority_map;
}
/// We want to create more or less optimal order of types in which we will try text deserializations.
/// To do it, for each type we calculate a priority and then sort them by this priority.
/// Above we defined priority of each data type, but types can be nested and also we can have LowCardinality and Nullable.
/// To sort any nested types we create a priority that is a tuple of 3 elements:
/// 1) The maximum depth of nested types like Array/Map/Tuple.
/// 2) The combination of simple and complex types priorities.
/// 3) The depth of nested types LowCardinality/Nullable.
/// So, when we will sort types, first we will sort by the maximum depth of nested types, so more nested types are deserialized first,
/// then for types with the same depth we sort by the types priority, and last we sort by the depth of LowCardinality/Nullable types,
/// so if we have types with the same level of nesting and the same priority, we will first try to deserialize LowCardinality/Nullable types
/// (for example if we have types Array(Array(String)) and Array(Array(Nullable(String))).
/// This is just a batch of heuristics.
std::tuple<size_t, size_t, size_t> getTypeTextDeserializePriority(const DataTypePtr & type, size_t nested_depth, size_t simple_nested_depth, const std::unordered_map<TypeIndex, size_t> & priority_map)
{
if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(type.get()))
return getTypeTextDeserializePriority(nullable_type->getNestedType(), nested_depth, simple_nested_depth + 1, priority_map);
if (const auto * lc_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
return getTypeTextDeserializePriority(lc_type->getDictionaryType(), nested_depth, simple_nested_depth + 1, priority_map);
if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get()))
{
auto [elements_nested_depth, elements_priority, elements_simple_nested_depth] = getTypeTextDeserializePriority(array_type->getNestedType(), nested_depth + 1, simple_nested_depth, priority_map);
return {elements_nested_depth, elements_priority + priority_map.at(TypeIndex::Array), elements_simple_nested_depth};
}
if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
{
size_t max_nested_depth = 0;
size_t sum_priority = 0;
size_t max_simple_nested_depth = 0;
for (const auto & elem : tuple_type->getElements())
{
auto [elem_nested_depth, elem_priority, elem_simple_nested_depth] = getTypeTextDeserializePriority(elem, nested_depth + 1, simple_nested_depth, priority_map);
sum_priority += elem_priority;
if (elem_nested_depth > max_nested_depth)
max_nested_depth = elem_nested_depth;
if (elem_simple_nested_depth > max_simple_nested_depth)
max_simple_nested_depth = elem_simple_nested_depth;
}
return {max_nested_depth, sum_priority + priority_map.at(TypeIndex::Tuple), max_simple_nested_depth};
}
if (const auto * map_type = typeid_cast<const DataTypeMap *>(type.get()))
{
auto [key_max_depth, key_priority, key_simple_nested_depth] = getTypeTextDeserializePriority(map_type->getKeyType(), nested_depth + 1, simple_nested_depth, priority_map);
auto [value_max_depth, value_priority, value_simple_nested_depth] = getTypeTextDeserializePriority(map_type->getValueType(), nested_depth + 1, simple_nested_depth, priority_map);
return {std::max(key_max_depth, value_max_depth), key_priority + value_priority + priority_map.at(TypeIndex::Map), std::max(key_simple_nested_depth, value_simple_nested_depth)};
}
if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(type.get()))
{
size_t max_priority = 0;
size_t max_depth = 0;
size_t max_simple_nested_depth = 0;
for (const auto & variant : variant_type->getVariants())
{
auto [variant_max_depth, variant_priority, variant_simple_nested_depth] = getTypeTextDeserializePriority(variant, nested_depth, simple_nested_depth, priority_map);
if (variant_priority > max_priority)
max_priority = variant_priority;
if (variant_max_depth > max_depth)
max_depth = variant_max_depth;
if (variant_simple_nested_depth > max_simple_nested_depth)
max_simple_nested_depth = variant_simple_nested_depth;
}
return {max_depth, max_priority, max_simple_nested_depth};
}
/// Bool type should have priority higher then all integers.
if (isBool(type))
return {nested_depth, priority_map.at(TypeIndex::Int8) + 1, simple_nested_depth};
auto it = priority_map.find(type->getTypeId());
return {nested_depth, it == priority_map.end() ? 0 : it->second, simple_nested_depth};
}
}
std::vector<size_t> SerializationVariant::getVariantsDeserializeTextOrder(const DB::DataTypes & variant_types)
{
std::vector<std::tuple<size_t, size_t, size_t>> priorities;
priorities.reserve(variant_types.size());
std::vector<size_t> order;
order.reserve(variant_types.size());
const auto & priority_map = getTypesTextDeserializePriorityMap();
for (size_t i = 0; i != variant_types.size(); ++i)
{
priorities.push_back(getTypeTextDeserializePriority(variant_types[i], 0, 0, priority_map));
order.push_back(i);
}
std::sort(order.begin(), order.end(), [&](size_t left, size_t right) { return priorities[left] > priorities[right]; });
return order;
}
bool SerializationVariant::tryDeserializeImpl(
IColumn & column,
const String & field,
std::function<bool(ReadBuffer &)> check_for_null,
std::function<bool(IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer &)> try_deserialize_nested) const
{
auto & column_variant = assert_cast<ColumnVariant &>(column);
ReadBufferFromString null_buf(field);
if (check_for_null(null_buf) && null_buf.eof())
{
column_variant.insertDefault();
return true;
}
for (size_t global_discr : deserialize_text_order)
{
ReadBufferFromString variant_buf(field);
auto & variant_column = column_variant.getVariantByGlobalDiscriminator(global_discr);
size_t prev_size = variant_column.size();
if (try_deserialize_nested(variant_column, variants[global_discr], variant_buf) && variant_buf.eof())
{
column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(global_discr));
column_variant.getOffsets().push_back(prev_size);
return true;
}
else if (variant_column.size() > prev_size)
{
variant_column.popBack(1);
}
}
return false;
}
void SerializationVariant::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto global_discr = col.globalDiscriminatorAt(row_num);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
SerializationNullable::serializeNullEscaped(ostr, settings);
else
variants[global_discr]->serializeTextEscaped(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
}
bool SerializationVariant::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readEscapedString(field, istr);
return tryDeserializeTextEscapedImpl(column, field, settings);
}
void SerializationVariant::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readEscapedString(field, istr);
if (!tryDeserializeTextEscapedImpl(column, field, settings))
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse escaped value of type {} here: {}", variant_name, field);
}
bool SerializationVariant::tryDeserializeTextEscapedImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
{
auto check_for_null = [&](ReadBuffer & buf)
{
return SerializationNullable::tryDeserializeNullEscaped(buf, settings);
};
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
{
return variant_serialization->tryDeserializeTextEscaped(variant_column, buf, settings);
};
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
}
void SerializationVariant::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto global_discr = col.globalDiscriminatorAt(row_num);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
SerializationNullable::serializeNullRaw(ostr, settings);
else
variants[global_discr]->serializeTextRaw(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
}
bool SerializationVariant::tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readString(field, istr);
return tryDeserializeTextRawImpl(column, field, settings);
}
void SerializationVariant::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readString(field, istr);
if (!tryDeserializeTextRawImpl(column, field, settings))
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse raw value of type {} here: {}", variant_name, field);
}
bool SerializationVariant::tryDeserializeTextRawImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
{
auto check_for_null = [&](ReadBuffer & buf)
{
return SerializationNullable::tryDeserializeNullRaw(buf, settings);
};
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
{
return variant_serialization->tryDeserializeTextRaw(variant_column, buf, settings);
};
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
}
void SerializationVariant::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto global_discr = col.globalDiscriminatorAt(row_num);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
SerializationNullable::serializeNullQuoted(ostr);
else
variants[global_discr]->serializeTextQuoted(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
}
bool SerializationVariant::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
if (!tryReadQuotedField(field, istr))
return false;
return tryDeserializeTextQuotedImpl(column, field, settings);
}
void SerializationVariant::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readQuotedField(field, istr);
if (!tryDeserializeTextQuotedImpl(column, field, settings))
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse quoted value of type {} here: {}", variant_name, field);
}
bool SerializationVariant::tryDeserializeTextQuotedImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
{
auto check_for_null = [&](ReadBuffer & buf)
{
return SerializationNullable::tryDeserializeNullQuoted(buf);
};
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
{
return variant_serialization->tryDeserializeTextQuoted(variant_column, buf, settings);
};
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
}
void SerializationVariant::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto global_discr = col.globalDiscriminatorAt(row_num);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
SerializationNullable::serializeNullCSV(ostr, settings);
else
variants[global_discr]->serializeTextCSV(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
}
bool SerializationVariant::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readCSVStringInto<String, true, false>(field, istr, settings.csv);
return tryDeserializeTextCSVImpl(column, field, settings);
}
void SerializationVariant::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readCSVField(field, istr, settings.csv);
if (!tryDeserializeTextCSVImpl(column, field, settings))
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse CSV value of type {} here: {}", variant_name, field);
}
bool SerializationVariant::tryDeserializeTextCSVImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
{
auto check_for_null = [&](ReadBuffer & buf)
{
return SerializationNullable::tryDeserializeNullCSV(buf, settings);
};
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
{
return variant_serialization->tryDeserializeTextCSV(variant_column, buf, settings);
};
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
}
void SerializationVariant::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto global_discr = col.globalDiscriminatorAt(row_num);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
SerializationNullable::serializeNullText(ostr, settings);
else
variants[global_discr]->serializeText(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
}
bool SerializationVariant::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readStringUntilEOF(field, istr);
return tryDeserializeWholeTextImpl(column, field, settings);
}
void SerializationVariant::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readStringUntilEOF(field, istr);
if (!tryDeserializeWholeTextImpl(column, field, settings))
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse text value of type {} here: {}", variant_name, field);
}
bool SerializationVariant::tryDeserializeWholeTextImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
{
auto check_for_null = [&](ReadBuffer & buf)
{
return SerializationNullable::tryDeserializeNullText(buf);
};
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
{
return variant_serialization->tryDeserializeWholeText(variant_column, buf, settings);
};
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
}
void SerializationVariant::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto global_discr = col.globalDiscriminatorAt(row_num);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
SerializationNullable::serializeNullJSON(ostr);
else
variants[global_discr]->serializeTextJSON(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
}
bool SerializationVariant::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
if (!tryReadJSONField(field, istr))
return false;
return tryDeserializeTextJSONImpl(column, field, settings);
}
void SerializationVariant::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String field;
readJSONField(field, istr);
if (!tryDeserializeTextJSONImpl(column, field, settings))
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON value of type {} here: {}", variant_name, field);
}
bool SerializationVariant::tryDeserializeTextJSONImpl(DB::IColumn & column, const String & field, const DB::FormatSettings & settings) const
{
auto check_for_null = [&](ReadBuffer & buf)
{
return SerializationNullable::tryDeserializeNullJSON(buf);
};
auto try_deserialize_variant =[&](IColumn & variant_column, const SerializationPtr & variant_serialization, ReadBuffer & buf)
{
return variant_serialization->tryDeserializeTextJSON(variant_column, buf, settings);
};
return tryDeserializeImpl(column, field, check_for_null, try_deserialize_variant);
}
void SerializationVariant::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto global_discr = col.globalDiscriminatorAt(row_num);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
SerializationNullable::serializeNullXML(ostr);
else
variants[global_discr]->serializeTextXML(col.getVariantByGlobalDiscriminator(global_discr), col.offsetAt(row_num), ostr, settings);
}
}

View File

@ -0,0 +1,139 @@
#pragma once
#include <DataTypes/Serializations/ISerialization.h>
#include <DataTypes/Serializations/SerializationVariantElement.h>
namespace DB
{
/// Class for serializing/deserializing column with Variant type.
/// It supports both text and binary bulk serializations/deserializations.
///
/// During text serialization it checks discriminator of the current row and
/// uses corresponding text serialization of this variant.
///
/// During text deserialization it tries all variants deserializations
/// (using tryDeserializeText* methods of ISerialization) in predefined order
/// and inserts data in the first variant with succeeded deserialization.
///
/// During binary bulk serialization it transforms local discriminators
/// to global and serializes them into a separate stream VariantDiscriminators.
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
/// in the discriminators column), so during deserialization the limit for each variant is
/// calculated according to discriminators column.
/// Offsets column is not serialized and stored only in memory.
///
/// During binary bulk deserialization we first deserialize discriminators from corresponding stream
/// and use them to calculate the limit for each variant. Each variant is deserialized from
/// corresponding stream using calculated limit. Offsets column is not deserialized and constructed
/// according to discriminators.
class SerializationVariant : public ISerialization
{
public:
using VariantSerializations = std::vector<SerializationPtr>;
explicit SerializationVariant(
const VariantSerializations & variants_,
const std::vector<String> & variant_names_,
const std::vector<size_t> & deserialize_text_order_,
const String & variant_name_)
: variants(variants_), variant_names(variant_names_), deserialize_text_order(deserialize_text_order_), variant_name(variant_name_)
{
}
void enumerateStreams(
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkStateSuffix(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkStatePrefix(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkWithMultipleStreams(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkWithMultipleStreams(
ColumnPtr & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
/// Determine the order in which we should try to deserialize variants.
/// In some cases the text representation of a value can be deserialized
/// into several types (for example, almost all text values can be deserialized
/// into String type), so we uses some heuristics to determine the more optimal order.
static std::vector<size_t> getVariantsDeserializeTextOrder(const DataTypes & variant_types);
private:
void addVariantElementToPath(SubstreamPath & path, size_t i) const;
bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
bool tryDeserializeTextCSVImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
bool tryDeserializeTextJSONImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
bool tryDeserializeTextRawImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
bool tryDeserializeImpl(
IColumn & column,
const String & field,
std::function<bool(ReadBuffer &)> check_for_null,
std::function<bool(IColumn & variant_columm, const SerializationPtr & nested, ReadBuffer &)> try_deserialize_nested) const;
VariantSerializations variants;
std::vector<String> variant_names;
std::vector<size_t> deserialize_text_order;
/// Name of Variant data type for better exception messages.
String variant_name;
};
}

View File

@ -0,0 +1,271 @@
#include <DataTypes/Serializations/SerializationVariantElement.h>
#include <DataTypes/Serializations/SerializationNumber.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnNullable.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
void SerializationVariantElement::enumerateStreams(
DB::ISerialization::EnumerateStreamsSettings & settings,
const DB::ISerialization::StreamCallback & callback,
const DB::ISerialization::SubstreamData & data) const
{
/// We will need stream for discriminators during deserialization.
settings.path.push_back(Substream::VariantDiscriminators);
callback(settings.path);
settings.path.pop_back();
addVariantToPath(settings.path);
settings.path.back().data = data;
nested_serialization->enumerateStreams(settings, callback, data);
removeVariantFromPath(settings.path);
}
void SerializationVariantElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
{
throw Exception(
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElement");
}
void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
{
throw Exception(
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement");
}
struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
{
/// During deserialization discriminators and variant streams can be shared.
/// For example we can read several variant elements together: "select v.UInt32, v.String from table",
/// or we can read the whole variant and some of variant elements: "select v, v.UInt32 from table".
/// To read the same column from the same stream more than once we use substream cache,
/// but this cache stores the whole column, not only the current range.
/// During deserialization of variant element discriminators and variant columns are not stored
/// in the result column, so we need to store them inside deserialization state, so we can use
/// substream cache correctly.
ColumnPtr discriminators;
ColumnPtr variant;
ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
};
void SerializationVariantElement::deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const
{
auto variant_element_state = std::make_shared<DeserializeBinaryBulkStateVariantElement>();
addVariantToPath(settings.path);
nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state);
removeVariantFromPath(settings.path);
state = std::move(variant_element_state);
}
void SerializationVariantElement::serializeBinaryBulkWithMultipleStreams(const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElement");
}
void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
ColumnPtr & result_column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const
{
auto * variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
/// First, deserialize discriminators from Variant column.
settings.path.push_back(Substream::VariantDiscriminators);
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
{
variant_element_state->discriminators = cached_discriminators;
}
else
{
auto * discriminators_stream = settings.getter(settings.path);
if (!discriminators_stream)
return;
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
if (!variant_element_state->discriminators || result_column->empty())
variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
}
settings.path.pop_back();
/// Iterate through new discriminators to calculate the limit for our variant.
const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
size_t discriminators_offset = variant_element_state->discriminators->size() - limit;
size_t variant_limit = 0;
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
variant_limit += (discriminators_data[i] == variant_discriminator);
/// Now we know the limit for our variant and can deserialize it.
/// If result column is Nullable, fill null map and extract nested column.
MutableColumnPtr mutable_column = result_column->assumeMutable();
if (isColumnNullable(*mutable_column))
{
auto & nullable_column = assert_cast<ColumnNullable &>(*mutable_column);
NullMap & null_map = nullable_column.getNullMapData();
/// If we have only our discriminator in range, fill null map with 0.
if (variant_limit == limit)
{
null_map.resize_fill(null_map.size() + limit, 0);
}
/// If no our discriminator in current range, fill null map with 1.
else if (variant_limit == 0)
{
null_map.resize_fill(null_map.size() + limit, 1);
}
/// Otherwise we should iterate through discriminators to fill null map.
else
{
null_map.reserve(null_map.size() + limit);
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
null_map.push_back(discriminators_data[i] != variant_discriminator);
}
mutable_column = nullable_column.getNestedColumnPtr()->assumeMutable();
}
/// If we started to read a new column, reinitialize variant column in deserialization state.
if (!variant_element_state->variant || result_column->empty())
{
variant_element_state->variant = mutable_column->cloneEmpty();
/// When result column is LowCardinality(Nullable(T)) we should
/// remove Nullable from variant column before deserialization.
if (isColumnLowCardinalityNullable(*mutable_column))
assert_cast<ColumnLowCardinality &>(*variant_element_state->variant->assumeMutable()).nestedRemoveNullable();
}
/// If nothing to deserialize, just insert defaults.
if (variant_limit == 0)
{
mutable_column->insertManyDefaults(limit);
return;
}
addVariantToPath(settings.path);
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache);
removeVariantFromPath(settings.path);
/// If nothing was deserialized when variant_limit > 0
/// it means that we don't have a stream for such sub-column.
/// It may happen during ALTER MODIFY column with Variant extension.
/// In this case we should just insert default values.
if (variant_element_state->variant->empty())
{
mutable_column->insertManyDefaults(limit);
return;
}
size_t variant_offset = variant_element_state->variant->size() - variant_limit;
/// If we have only our discriminator in range, insert the whole range to result column.
if (variant_limit == limit)
{
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit);
}
/// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator.
else
{
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
{
if (discriminators_data[i] == variant_discriminator)
mutable_column->insertFrom(*variant_element_state->variant, variant_offset++);
else
mutable_column->insertDefault();
}
}
}
void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const
{
path.push_back(Substream::VariantElements);
path.push_back(Substream::VariantElement);
path.back().variant_element_name = variant_element_name;
}
void SerializationVariantElement::removeVariantFromPath(DB::ISerialization::SubstreamPath & path) const
{
path.pop_back();
path.pop_back();
}
SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
const ColumnPtr & local_discriminators_,
const String & variant_element_name_,
const ColumnVariant::Discriminator global_variant_discriminator_,
const ColumnVariant::Discriminator local_variant_discriminator_)
: local_discriminators(local_discriminators_)
, variant_element_name(variant_element_name_)
, global_variant_discriminator(global_variant_discriminator_)
, local_variant_discriminator(local_variant_discriminator_)
{
}
DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const
{
return makeNullableOrLowCardinalityNullableSafe(prev);
}
SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const
{
return std::make_shared<SerializationVariantElement>(prev, variant_element_name, global_variant_discriminator);
}
ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::ColumnPtr & prev) const
{
/// Case when original Variant column contained only one non-empty variant and no NULLs.
/// In this case just use this variant.
if (prev->size() == local_discriminators->size())
return makeNullableOrLowCardinalityNullableSafe(prev);
/// If this variant is empty, fill result column with default values.
if (prev->empty())
{
auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty();
res->insertManyDefaults(local_discriminators->size());
return res;
}
/// In general case we should iterate through discriminators and create null-map for our variant.
NullMap null_map;
null_map.reserve(local_discriminators->size());
const auto & local_discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*local_discriminators).getData();
for (auto local_discr : local_discriminators_data)
null_map.push_back(local_discr != local_variant_discriminator);
/// Now we can create new column from null-map and variant column using IColumn::expand.
auto res_column = IColumn::mutate(prev);
/// Special case for LowCardinality. We want the result to be LowCardinality(Nullable),
/// but we don't have a good way to apply null-mask for LowCardinality(), so, we first
/// convert our column to LowCardinality(Nullable()) and then use expand which will
/// fill rows with 0 in mask with default value (that is NULL).
if (prev->lowCardinality())
res_column = assert_cast<ColumnLowCardinality &>(*res_column).cloneNullable();
res_column->expand(null_map, /*inverted = */ true);
if (res_column->canBeInsideNullable())
{
auto null_map_col = ColumnUInt8::create();
null_map_col->getData() = std::move(null_map);
return ColumnNullable::create(std::move(res_column), std::move(null_map_col));
}
return res_column;
}
}

View File

@ -0,0 +1,87 @@
#pragma once
#include <DataTypes/Serializations/SerializationWrapper.h>
#include <DataTypes/DataTypeNullable.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnVariant.h>
namespace DB
{
class SerializationVariant;
/// Serialization for Variant element when we read it as a subcolumn.
class SerializationVariantElement final : public SerializationWrapper
{
private:
/// To be able to deserialize Variant element as a subcolumn
/// we need its type name and global discriminator.
String variant_element_name;
ColumnVariant::Discriminator variant_discriminator;
public:
SerializationVariantElement(const SerializationPtr & nested_, const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_)
: SerializationWrapper(nested_)
, variant_element_name(variant_element_name_)
, variant_discriminator(variant_discriminator_)
{
}
void enumerateStreams(
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;
void serializeBinaryBulkStatePrefix(
const IColumn & column,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkStateSuffix(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkStatePrefix(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkWithMultipleStreams(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkWithMultipleStreams(
ColumnPtr & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const override;
private:
friend SerializationVariant;
void addVariantToPath(SubstreamPath & path) const;
void removeVariantFromPath(SubstreamPath & path) const;
struct VariantSubcolumnCreator : public ISubcolumnCreator
{
const ColumnPtr local_discriminators;
const String variant_element_name;
const ColumnVariant::Discriminator global_variant_discriminator;
const ColumnVariant::Discriminator local_variant_discriminator;
VariantSubcolumnCreator(
const ColumnPtr & local_discriminators_,
const String & variant_element_name_,
const ColumnVariant::Discriminator global_variant_discriminator_,
const ColumnVariant::Discriminator local_variant_discriminator_);
DataTypePtr create(const DataTypePtr & prev) const override;
ColumnPtr create(const ColumnPtr & prev) const override;
SerializationPtr create(const SerializationPtr & prev) const override;
};
};
}

View File

@ -96,6 +96,11 @@ void SerializationWrapper::deserializeTextEscaped(IColumn & column, ReadBuffer &
nested_serialization->deserializeTextEscaped(column, istr, settings);
}
bool SerializationWrapper::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return nested_serialization->tryDeserializeTextEscaped(column, istr, settings);
}
void SerializationWrapper::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nested_serialization->serializeTextQuoted(column, row_num, ostr, settings);
@ -106,6 +111,11 @@ void SerializationWrapper::deserializeTextQuoted(IColumn & column, ReadBuffer &
nested_serialization->deserializeTextQuoted(column, istr, settings);
}
bool SerializationWrapper::tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return nested_serialization->tryDeserializeTextQuoted(column, istr, settings);
}
void SerializationWrapper::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nested_serialization->serializeTextCSV(column, row_num, ostr, settings);
@ -116,6 +126,11 @@ void SerializationWrapper::deserializeTextCSV(IColumn & column, ReadBuffer & ist
nested_serialization->deserializeTextCSV(column, istr, settings);
}
bool SerializationWrapper::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return nested_serialization->tryDeserializeTextCSV(column, istr, settings);
}
void SerializationWrapper::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nested_serialization->serializeText(column, row_num, ostr, settings);
@ -126,6 +141,11 @@ void SerializationWrapper::deserializeWholeText(IColumn & column, ReadBuffer & i
nested_serialization->deserializeWholeText(column, istr, settings);
}
bool SerializationWrapper::tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return nested_serialization->tryDeserializeWholeText(column, istr, settings);
}
void SerializationWrapper::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nested_serialization->serializeTextJSON(column, row_num, ostr, settings);
@ -136,6 +156,11 @@ void SerializationWrapper::deserializeTextJSON(IColumn & column, ReadBuffer & is
nested_serialization->deserializeTextJSON(column, istr, settings);
}
bool SerializationWrapper::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return nested_serialization->tryDeserializeTextJSON(column, istr, settings);
}
void SerializationWrapper::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
{
nested_serialization->serializeTextJSONPretty(column, row_num, ostr, settings, indent);

View File

@ -63,18 +63,23 @@ public:
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;

View File

@ -36,29 +36,67 @@ protected:
deserializeText(column, istr, settings, true);
}
bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
return tryDeserializeText(column, istr, settings, true);
}
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
deserializeText(column, istr, settings, false);
}
bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
return tryDeserializeText(column, istr, settings, false);
}
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
deserializeText(column, istr, settings, false);
}
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
return tryDeserializeText(column, istr, settings, false);
}
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
deserializeText(column, istr, settings, false);
}
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
return tryDeserializeText(column, istr, settings, false);
}
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
deserializeText(column, istr, settings, false);
}
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
{
return tryDeserializeText(column, istr, settings, false);
}
/// whole = true means that buffer contains only one value, so we should read until EOF.
/// It's needed to check if there is garbage after parsed field.
virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const = 0;
virtual bool tryDeserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
{
try
{
deserializeText(column, istr, settings, whole);
return true;
}
catch (...)
{
return false;
}
}
};
}

View File

@ -223,6 +223,7 @@ bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_typ
case TypeIndex::AggregateFunction:
case TypeIndex::Nothing:
case TypeIndex::JSONPaths:
case TypeIndex::Variant:
return false;
}

View File

@ -18,6 +18,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeVariant.h>
namespace DB
@ -383,6 +384,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
return throwOrReturn<on_error>(types, "because some of them are Maps and some of them are not", ErrorCodes::NO_COMMON_TYPE);
auto keys_common_type = getLeastSupertype<on_error>(key_types);
auto values_common_type = getLeastSupertype<on_error>(value_types);
/// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype for keys or values,
/// keys_common_type or values_common_type will be nullptr, we should return nullptr in this case.
@ -424,6 +426,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
else
{
auto nested_type = getLeastSupertype<on_error>(nested_types);
/// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype,
/// nested_type will be nullptr, we should return nullptr in this case.
if (!nested_type)
@ -637,6 +640,32 @@ DataTypePtr getLeastSupertypeOrString(const DataTypes & types)
return getLeastSupertype<LeastSupertypeOnError::String>(types);
}
DataTypePtr getLeastSupertypeOrVariant(const DataTypes & types)
{
auto common_type = getLeastSupertype<LeastSupertypeOnError::Null>(types);
if (common_type)
return common_type;
/// Create Variant with provided arguments as variants.
DataTypes variants;
for (const auto & type : types)
{
/// Nested Variant types are not supported. If we have Variant type
/// we use all its variants in the result Variant.
if (isVariant(type))
{
const DataTypes & nested_variants = assert_cast<const DataTypeVariant &>(*type).getVariants();
variants.insert(variants.end(), nested_variants.begin(), nested_variants.end());
}
else
{
variants.push_back(removeNullableOrLowCardinalityNullable(type));
}
}
return std::make_shared<DataTypeVariant>(variants);
}
DataTypePtr tryGetLeastSupertype(const DataTypes & types)
{
return getLeastSupertype<LeastSupertypeOnError::Null>(types);

View File

@ -24,6 +24,17 @@ DataTypePtr getLeastSupertype(const DataTypes & types);
/// All types can be casted to String, because they can be serialized to String.
DataTypePtr getLeastSupertypeOrString(const DataTypes & types);
/// Same as getLeastSupertype but in case when there is no supertype for provided types
/// it uses Variant of these types as a supertype. Any type can be casted to a Variant
/// that contains this type.
/// As nested Variants are not allowed, if one of the types is Variant, it's variants
/// are used in the resulting Variant.
/// Examples:
/// (UInt64, String) -> Variant(UInt64, String)
/// (Array(UInt64), Array(String)) -> Variant(Array(UInt64), Array(String))
/// (Variant(UInt64, String), Array(UInt32)) -> Variant(UInt64, String, Array(UInt32))
DataTypePtr getLeastSupertypeOrVariant(const DataTypes & types);
/// Same as above but return nullptr instead of throwing exception.
DataTypePtr tryGetLeastSupertype(const DataTypes & types);

View File

@ -925,6 +925,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
query_context->setSetting("allow_experimental_nlp_functions", 1);
query_context->setSetting("allow_experimental_hash_functions", 1);
query_context->setSetting("allow_experimental_object_type", 1);
query_context->setSetting("allow_experimental_variant_type", 1);
query_context->setSetting("allow_experimental_annoy_index", 1);
query_context->setSetting("allow_experimental_usearch_index", 1);
query_context->setSetting("allow_experimental_bigint_types", 1);

View File

@ -109,31 +109,31 @@ bool deserializeFieldByEscapingRule(
{
case FormatSettings::EscapingRule::Escaped:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextEscapedImpl(column, buf, format_settings, serialization);
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextEscaped(column, buf, format_settings, serialization);
else
serialization->deserializeTextEscaped(column, buf, format_settings);
break;
case FormatSettings::EscapingRule::Quoted:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextQuotedImpl(column, buf, format_settings, serialization);
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextQuoted(column, buf, format_settings, serialization);
else
serialization->deserializeTextQuoted(column, buf, format_settings);
break;
case FormatSettings::EscapingRule::CSV:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextCSVImpl(column, buf, format_settings, serialization);
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(column, buf, format_settings, serialization);
else
serialization->deserializeTextCSV(column, buf, format_settings);
break;
case FormatSettings::EscapingRule::JSON:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextJSONImpl(column, buf, format_settings, serialization);
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, buf, format_settings, serialization);
else
serialization->deserializeTextJSON(column, buf, format_settings);
break;
case FormatSettings::EscapingRule::Raw:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextRawImpl(column, buf, format_settings, serialization);
read = SerializationNullable::deserializeNullAsDefaultOrNestedTextRaw(column, buf, format_settings, serialization);
else
serialization->deserializeTextRaw(column, buf, format_settings);
break;

View File

@ -282,14 +282,14 @@ namespace JSONUtils
ReadBufferFromString buf(str);
if (as_nullable)
return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization);
return SerializationNullable::deserializeNullAsDefaultOrNestedWholeText(column, buf, format_settings, serialization);
serialization->deserializeWholeText(column, buf, format_settings);
return true;
}
if (as_nullable)
return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization);
return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization);
serialization->deserializeTextJSON(column, in, format_settings);
return true;

View File

@ -966,7 +966,7 @@ namespace
if constexpr (is_json)
ok = tryReadJSONStringInto(field, buf);
else
ok = tryReadQuotedStringInto(field, buf);
ok = tryReadQuotedString(field, buf);
if (!ok)
return nullptr;

View File

@ -28,6 +28,7 @@
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/ObjectUtils.h>
#include <DataTypes/DataTypeNested.h>
#include <DataTypes/DataTypeVariant.h>
#include <DataTypes/Serializations/SerializationDecimal.h>
#include <Formats/FormatSettings.h>
#include <Columns/ColumnString.h>
@ -40,6 +41,7 @@
#include <Columns/ColumnMap.h>
#include <Columns/ColumnObject.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnStringHelpers.h>
#include <Common/assert_cast.h>
#include <Common/Concepts.h>
@ -4066,6 +4068,259 @@ arguments, result_type, input_rows_count); \
"Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName());
}
WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const
{
/// We support only extension of variant type, so, only new types can be added.
/// For example: Variant(T1, T2) -> Variant(T1, T2, T3) is supported, but Variant(T1, T2) -> Variant(T1, T3) is not supported.
/// We want to extend Variant type for free without rewriting the data, but we sort data types inside Variant during type creation
/// (we do it because we want Variant(T1, T2) to be the same as Variant(T2, T1)), but after extension the order of variant types
/// (and so their discriminators) can be different. For example: Variant(T1, T3) -> Variant(T1, T2, T3).
/// To avoid full rewrite of discriminators column, ColumnVariant supports it's local order of variant columns (and so local
/// discriminators) and stores mapping global order -> local order.
/// So, to extend Variant with new types for free, we should keep old local order for old variants, append new variants and change
/// mapping global order -> local order according to the new global order.
/// Create map (new variant type) -> (it's global discriminator in new order).
const auto & new_variants = to_variant.getVariants();
std::unordered_map<String, ColumnVariant::Discriminator> new_variant_types_to_new_global_discriminator;
new_variant_types_to_new_global_discriminator.reserve(new_variants.size());
for (size_t i = 0; i != new_variants.size(); ++i)
new_variant_types_to_new_global_discriminator[new_variants[i]->getName()] = i;
/// Create set of old variant types.
const auto & old_variants = from_variant.getVariants();
std::unordered_map<String, ColumnVariant::Discriminator> old_variant_types_to_old_global_discriminator;
old_variant_types_to_old_global_discriminator.reserve(old_variants.size());
for (size_t i = 0; i != old_variants.size(); ++i)
old_variant_types_to_old_global_discriminator[old_variants[i]->getName()] = i;
/// Check that the set of old variants types is a subset of new variant types and collect new global discriminator for each old global discriminator.
std::unordered_map<ColumnVariant::Discriminator, ColumnVariant::Discriminator> old_global_discriminator_to_new;
old_global_discriminator_to_new.reserve(old_variants.size());
for (const auto & [old_variant_type, old_discriminator] : old_variant_types_to_old_global_discriminator)
{
auto it = new_variant_types_to_new_global_discriminator.find(old_variant_type);
if (it == new_variant_types_to_new_global_discriminator.end())
throw Exception(
ErrorCodes::CANNOT_CONVERT_TYPE,
"Cannot convert type {} to {}. Conversion between Variant types is allowed only when new Variant type is an extension "
"of an initial one", from_variant.getName(), to_variant.getName());
old_global_discriminator_to_new[old_discriminator] = it->second;
}
/// Collect variant types and their global discriminators that should be added to the old Variant to get the new Variant.
std::vector<std::pair<DataTypePtr, ColumnVariant::Discriminator>> variant_types_and_discriminators_to_add;
variant_types_and_discriminators_to_add.reserve(new_variants.size() - old_variants.size());
for (size_t i = 0; i != new_variants.size(); ++i)
{
if (!old_variant_types_to_old_global_discriminator.contains(new_variants[i]->getName()))
variant_types_and_discriminators_to_add.emplace_back(new_variants[i], i);
}
return [old_global_discriminator_to_new, variant_types_and_discriminators_to_add]
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
{
const auto & column_variant = assert_cast<const ColumnVariant &>(*arguments.front().column.get());
size_t num_old_variants = column_variant.getNumVariants();
Columns new_variant_columns;
new_variant_columns.reserve(num_old_variants + variant_types_and_discriminators_to_add.size());
std::vector<ColumnVariant::Discriminator> new_local_to_global_discriminators;
new_local_to_global_discriminators.reserve(num_old_variants + variant_types_and_discriminators_to_add.size());
for (size_t i = 0; i != num_old_variants; ++i)
{
new_variant_columns.push_back(column_variant.getVariantPtrByLocalDiscriminator(i));
new_local_to_global_discriminators.push_back(old_global_discriminator_to_new.at(column_variant.globalDiscriminatorByLocal(i)));
}
for (const auto & [new_variant_type, new_global_discriminator] : variant_types_and_discriminators_to_add)
{
new_variant_columns.push_back(new_variant_type->createColumn());
new_local_to_global_discriminators.push_back(new_global_discriminator);
}
return ColumnVariant::create(column_variant.getLocalDiscriminatorsPtr(), column_variant.getOffsetsPtr(), new_variant_columns, new_local_to_global_discriminators);
};
}
WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const
{
const auto & variant_types = from_variant.getVariants();
std::vector<WrapperType> variant_wrappers;
variant_wrappers.reserve(variant_types.size());
/// Create conversion wrapper for each variant.
for (const auto & variant_type : variant_types)
variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type));
return [variant_wrappers, variant_types, to_type]
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
{
const auto & column_variant = assert_cast<const ColumnVariant &>(*arguments.front().column.get());
/// First, cast each variant to the result type.
std::vector<ColumnPtr> casted_variant_columns;
casted_variant_columns.reserve(variant_types.size());
for (size_t i = 0; i != variant_types.size(); ++i)
{
auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i);
ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }};
const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)];
casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
}
/// Second, construct resulting column from casted variant columns according to discriminators.
const auto & local_discriminators = column_variant.getLocalDiscriminators();
auto res = result_type->createColumn();
res->reserve(input_rows_count);
for (size_t i = 0; i != input_rows_count; ++i)
{
auto local_discr = local_discriminators[i];
if (local_discr == ColumnVariant::NULL_DISCRIMINATOR)
res->insertDefault();
else
res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i));
}
return res;
};
}
static ColumnPtr createVariantFromDescriptorsAndOneNonEmptyVariant(const DataTypes & variant_types, const ColumnPtr & discriminators, const ColumnPtr & variant, ColumnVariant::Discriminator variant_discr)
{
Columns variants;
variants.reserve(variant_types.size());
for (size_t i = 0; i != variant_types.size(); ++i)
{
if (i == variant_discr)
variants.emplace_back(variant);
else
variants.push_back(variant_types[i]->createColumn());
}
return ColumnVariant::create(discriminators, variants);
}
WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const
{
/// We allow converting NULL to Variant(...) as Variant can store NULLs.
if (from_type->onlyNull())
{
return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
{
auto result_column = result_type->createColumn();
result_column->insertManyDefaults(input_rows_count);
return result_column;
};
}
auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(removeNullableOrLowCardinalityNullable(from_type));
if (!variant_discr_opt)
throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName());
return [variant_discr = *variant_discr_opt]
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) -> ColumnPtr
{
const auto & result_variant_type = assert_cast<const DataTypeVariant &>(*result_type);
const auto & variant_types = result_variant_type.getVariants();
if (const ColumnNullable * col_nullable = typeid_cast<const ColumnNullable *>(arguments.front().column.get()))
{
const auto & column = col_nullable->getNestedColumnPtr();
const auto & null_map = col_nullable->getNullMapData();
IColumn::Filter filter;
filter.reserve(column->size());
auto discriminators = ColumnVariant::ColumnDiscriminators::create();
auto & discriminators_data = discriminators->getData();
discriminators_data.reserve(column->size());
size_t variant_size_hint = 0;
for (size_t i = 0; i != column->size(); ++i)
{
if (null_map[i])
{
discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR);
filter.push_back(0);
}
else
{
discriminators_data.push_back(variant_discr);
filter.push_back(1);
++variant_size_hint;
}
}
ColumnPtr variant_column;
/// If there were no NULLs, just use the column.
if (variant_size_hint == column->size())
variant_column = column;
/// Otherwise we should use filtered column.
else
variant_column = column->filter(filter, variant_size_hint);
return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), variant_column, variant_discr);
}
else if (isColumnLowCardinalityNullable(*arguments.front().column))
{
const auto & column = arguments.front().column;
/// Variant column cannot have LowCardinality(Nullable(...)) variant, as Variant column stores NULLs itself.
/// We should create a null-map, insert NULL_DISCRIMINATOR on NULL values and filter initial column.
const auto & col_lc = assert_cast<const ColumnLowCardinality &>(*column);
const auto & indexes = col_lc.getIndexes();
auto null_index = col_lc.getDictionary().getNullValueIndex();
IColumn::Filter filter;
filter.reserve(col_lc.size());
auto discriminators = ColumnVariant::ColumnDiscriminators::create();
auto & discriminators_data = discriminators->getData();
discriminators_data.reserve(col_lc.size());
size_t variant_size_hint = 0;
for (size_t i = 0; i != col_lc.size(); ++i)
{
if (indexes.getUInt(i) == null_index)
{
discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR);
filter.push_back(0);
}
else
{
discriminators_data.push_back(variant_discr);
filter.push_back(1);
++variant_size_hint;
}
}
MutableColumnPtr variant_column;
/// If there were no NULLs, we can just clone the column.
if (variant_size_hint == col_lc.size())
variant_column = IColumn::mutate(column);
/// Otherwise we should filter column.
else
variant_column = column->filter(filter, variant_size_hint)->assumeMutable();
assert_cast<ColumnLowCardinality &>(*variant_column).nestedRemoveNullable();
return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), std::move(variant_column), variant_discr);
}
else
{
const auto & column = arguments.front().column;
auto discriminators = ColumnVariant::ColumnDiscriminators::create();
discriminators->getData().resize_fill(column->size(), variant_discr);
return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), column, variant_discr);
}
};
}
/// Wrapper for conversion to/from Variant type
WrapperType createVariantWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const
{
if (const auto * from_variant = checkAndGetDataType<DataTypeVariant>(from_type.get()))
{
if (const auto * to_variant = checkAndGetDataType<DataTypeVariant>(to_type.get()))
return createVariantToVariantWrapper(*from_variant, *to_variant);
return createVariantToColumnWrapper(*from_variant, to_type);
}
return createColumnToVariantWrapper(from_type, assert_cast<const DataTypeVariant &>(*to_type));
}
template <typename FieldType>
WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum<FieldType> * to_type) const
{
@ -4245,6 +4500,11 @@ arguments, result_type, input_rows_count); \
WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const
{
/// Conversion from/to Variant data type is processed in a special way.
/// We don't need to remove LowCardinality/Nullable.
if (isVariant(to_type) || isVariant(from_type))
return createVariantWrapper(from_type, to_type);
const auto * from_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(from_type.get());
const auto * to_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(to_type.get());
const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type;
@ -4252,7 +4512,7 @@ arguments, result_type, input_rows_count); \
if (from_type->onlyNull())
{
if (!to_nested->isNullable())
if (!to_nested->isNullable() && !isVariant(to_type))
{
if (cast_type == CastType::accurateOrNull)
{

View File

@ -313,7 +313,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
{
bool use_default_implementation_for_sparse_columns = useDefaultImplementationForSparseColumns();
/// DataTypeFunction does not support obtaining default (isDefaultAt())
/// ColumnFunction does not support getting specific values
/// ColumnFunction does not support getting specific values.
if (result_type->getTypeId() != TypeIndex::Function && use_default_implementation_for_sparse_columns)
{
size_t num_sparse_columns = 0;
@ -368,7 +368,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1)
{
const auto & offsets_data = assert_cast<const ColumnVector<UInt64> &>(*sparse_offsets).getData();
return res->createWithOffsets(offsets_data, (*res)[0], input_rows_count, /*shift=*/ 1);
return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/ 1);
}
return ColumnSparse::create(res, sparse_offsets, input_rows_count);

View File

@ -4,6 +4,7 @@
#include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnArray.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Context.h>
namespace DB
@ -14,9 +15,12 @@ class FunctionArray : public IFunction
{
public:
static constexpr auto name = "array";
static FunctionPtr create(ContextPtr)
explicit FunctionArray(bool use_variant_as_common_type_ = false) : use_variant_as_common_type(use_variant_as_common_type_) {}
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionArray>();
return std::make_shared<FunctionArray>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
}
bool useDefaultImplementationForNulls() const override { return false; }
@ -31,6 +35,9 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (use_variant_as_common_type)
return std::make_shared<DataTypeArray>(getLeastSupertypeOrVariant(arguments));
return std::make_shared<DataTypeArray>(getLeastSupertype(arguments));
}
@ -97,6 +104,8 @@ private:
}
bool addField(DataTypePtr type_res, const Field & f, Array & arr) const;
bool use_variant_as_common_type = false;
};

View File

@ -4,6 +4,7 @@
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeVariant.h>
#include <DataTypes/NumberTraits.h>
#include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnVector.h>
@ -14,6 +15,7 @@
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnVariant.h>
#include <Columns/MaskOperations.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
@ -22,6 +24,8 @@
#include <Functions/GatherUtils/Algorithms.h>
#include <Functions/FunctionIfBase.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Context.h>
#include <Functions/FunctionFactory.h>
#include <type_traits>
@ -258,9 +262,16 @@ class FunctionIf : public FunctionIfBase
{
public:
static constexpr auto name = "if";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIf>(); }
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionIf>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
}
explicit FunctionIf(bool use_variant_when_no_common_type_ = false) : FunctionIfBase(), use_variant_when_no_common_type(use_variant_when_no_common_type_) {}
private:
bool use_variant_when_no_common_type = false;
template <typename T0, typename T1>
static UInt32 decimalScale(const ColumnsWithTypeAndName & arguments [[maybe_unused]])
{
@ -669,13 +680,17 @@ private:
}
static ColumnPtr executeGeneric(
const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count)
const ColumnUInt8 * cond_col, const ColumnsWithTypeAndName & arguments, size_t input_rows_count, bool use_variant_when_no_common_type)
{
/// Convert both columns to the common type (if needed).
const ColumnWithTypeAndName & arg1 = arguments[1];
const ColumnWithTypeAndName & arg2 = arguments[2];
DataTypePtr common_type = getLeastSupertype(DataTypes{arg1.type, arg2.type});
DataTypePtr common_type;
if (use_variant_when_no_common_type)
common_type = getLeastSupertypeOrVariant(DataTypes{arg1.type, arg2.type});
else
common_type = getLeastSupertype(DataTypes{arg1.type, arg2.type});
ColumnPtr col_then = castColumn(arg1, common_type);
ColumnPtr col_else = castColumn(arg2, common_type);
@ -850,6 +865,10 @@ private:
ColumnPtr executeForNullableThenElse(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
{
/// If result type is Variant, we don't need to remove Nullable.
if (isVariant(result_type))
return nullptr;
const ColumnWithTypeAndName & arg_cond = arguments[0];
const ColumnWithTypeAndName & arg_then = arguments[1];
const ColumnWithTypeAndName & arg_else = arguments[2];
@ -955,6 +974,11 @@ private:
assert_cast<ColumnNullable &>(*result_column).applyNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column));
return result_column;
}
else if (auto * variant_column = typeid_cast<ColumnVariant *>(result_column.get()))
{
variant_column->applyNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column).getData());
return result_column;
}
else
return ColumnNullable::create(materializeColumnIfConst(result_column), arg_cond.column);
}
@ -993,6 +1017,11 @@ private:
assert_cast<ColumnNullable &>(*result_column).applyNegatedNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column));
return result_column;
}
else if (auto * variant_column = typeid_cast<ColumnVariant *>(result_column.get()))
{
variant_column->applyNegatedNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column).getData());
return result_column;
}
else
{
size_t size = input_rows_count;
@ -1082,6 +1111,9 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument (condition) of function if. "
"Must be UInt8.", arguments[0]->getName());
if (use_variant_when_no_common_type)
return getLeastSupertypeOrVariant(DataTypes{arguments[1], arguments[2]});
return getLeastSupertype(DataTypes{arguments[1], arguments[2]});
}
@ -1165,7 +1197,7 @@ public:
|| (res = executeGenericArray(cond_col, arguments, result_type))
|| (res = executeTuple(arguments, result_type, input_rows_count))))
{
return executeGeneric(cond_col, arguments, input_rows_count);
return executeGeneric(cond_col, arguments, input_rows_count, use_variant_when_no_common_type);
}
return res;

View File

@ -5,6 +5,7 @@
#include <Core/ColumnNumbers.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnVariant.h>
#include <Common/assert_cast.h>
@ -45,6 +46,18 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & elem = arguments[0];
if (isVariant(elem.type))
{
const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
auto res = DataTypeUInt8().createColumn();
auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
data.reserve(discriminators.size());
for (auto discr : discriminators)
data.push_back(discr != ColumnVariant::NULL_DISCRIMINATOR);
return res;
}
if (elem.type->isLowCardinalityNullable())
{
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(*elem.column);

View File

@ -5,6 +5,7 @@
#include <Core/ColumnNumbers.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnVariant.h>
namespace DB
@ -44,6 +45,18 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
{
const ColumnWithTypeAndName & elem = arguments[0];
if (isVariant(elem.type))
{
const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
auto res = DataTypeUInt8().createColumn();
auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
data.reserve(discriminators.size());
for (auto discr : discriminators)
data.push_back(discr == ColumnVariant::NULL_DISCRIMINATOR);
return res;
}
if (elem.type->isLowCardinalityNullable())
{
const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(*elem.column);

View File

@ -8,6 +8,7 @@
#include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnMap.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Context.h>
#include <Common/HashTable/HashSet.h>
@ -30,9 +31,11 @@ class FunctionMap : public IFunction
public:
static constexpr auto name = "map";
static FunctionPtr create(ContextPtr)
explicit FunctionMap(bool use_variant_as_common_type_) : use_variant_as_common_type(use_variant_as_common_type_) {}
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionMap>();
return std::make_shared<FunctionMap>(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type);
}
String getName() const override
@ -77,8 +80,16 @@ public:
}
DataTypes tmp;
tmp.emplace_back(getLeastSupertype(keys));
tmp.emplace_back(getLeastSupertype(values));
if (use_variant_as_common_type)
{
tmp.emplace_back(getLeastSupertypeOrVariant(keys));
tmp.emplace_back(getLeastSupertypeOrVariant(values));
}
else
{
tmp.emplace_back(getLeastSupertype(keys));
tmp.emplace_back(getLeastSupertype(values));
}
return std::make_shared<DataTypeMap>(tmp);
}
@ -138,6 +149,9 @@ public:
return ColumnMap::create(nested_column);
}
private:
bool use_variant_as_common_type = false;
};
/// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays

View File

@ -9,6 +9,7 @@
#include <Common/typeid_cast.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeVariant.h>
#include <DataTypes/getLeastSupertype.h>
@ -117,6 +118,9 @@ public:
types_of_branches.emplace_back(arg);
});
if (context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type)
return getLeastSupertypeOrVariant(types_of_branches);
return getLeastSupertype(types_of_branches);
}

View File

@ -0,0 +1,238 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeVariant.h>
#include <DataTypes/DataTypeFactory.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/assert_cast.h>
#include <memory>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
/** Extract element of Variant by variant type name.
* Also the function looks through Arrays: you can get Array of Variant elements from Array of Variants.
*/
class FunctionVariantElement : public IFunction
{
public:
static constexpr auto name = "variantElement";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionVariantElement>(); }
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
const size_t number_of_arguments = arguments.size();
if (number_of_arguments < 2 || number_of_arguments > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
getName(), number_of_arguments);
size_t count_arrays = 0;
const IDataType * input_type = arguments[0].type.get();
while (const DataTypeArray * array = checkAndGetDataType<DataTypeArray>(input_type))
{
input_type = array->getNestedType().get();
++count_arrays;
}
const DataTypeVariant * variant_type = checkAndGetDataType<DataTypeVariant>(input_type);
if (!variant_type)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} must be Variant or Array of Variant. Actual {}",
getName(),
arguments[0].type->getName());
std::optional<size_t> variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *variant_type, number_of_arguments);
if (variant_global_discr.has_value())
{
DataTypePtr return_type = makeNullableOrLowCardinalityNullableSafe(variant_type->getVariant(variant_global_discr.value()));
for (; count_arrays; --count_arrays)
return_type = std::make_shared<DataTypeArray>(return_type);
return return_type;
}
else
return arguments[2].type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & input_arg = arguments[0];
const IDataType * input_type = input_arg.type.get();
const IColumn * input_col = input_arg.column.get();
bool input_arg_is_const = false;
if (typeid_cast<const ColumnConst *>(input_col))
{
input_col = assert_cast<const ColumnConst *>(input_col)->getDataColumnPtr().get();
input_arg_is_const = true;
}
Columns array_offsets;
while (const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(input_type))
{
const ColumnArray * array_col = assert_cast<const ColumnArray *>(input_col);
input_type = array_type->getNestedType().get();
input_col = &array_col->getData();
array_offsets.push_back(array_col->getOffsetsPtr());
}
const DataTypeVariant * input_type_as_variant = checkAndGetDataType<DataTypeVariant>(input_type);
const ColumnVariant * input_col_as_variant = checkAndGetColumn<ColumnVariant>(input_col);
if (!input_type_as_variant || !input_col_as_variant)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument for function {} must be Variant or array of Variants. Actual {}", getName(), input_arg.type->getName());
std::optional<size_t> variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size());
if (!variant_global_discr.has_value())
return arguments[2].column;
const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr);
const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr);
/// If Variant has only NULLs or our variant doesn't have any real values,
/// just create column with default values and create null mask with 1.
if (input_col_as_variant->hasOnlyNulls() || variant_column->empty())
{
auto res = variant_type->createColumn();
if (variant_type->lowCardinality())
assert_cast<ColumnLowCardinality &>(*res).nestedToNullable();
res->insertManyDefaults(input_col_as_variant->size());
if (!variant_type->canBeInsideNullable())
return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count);
auto null_map = ColumnUInt8::create();
auto & null_map_data = null_map->getData();
null_map_data.resize_fill(input_col_as_variant->size(), 1);
return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(res), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count);
}
/// If we extract single non-empty column and have no NULLs, then just return this variant.
if (auto non_empty_local_discr = input_col_as_variant->getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
{
/// If we were trying to extract some other variant,
/// it would be empty and we would already processed this case above.
chassert(input_col_as_variant->globalDiscriminatorByLocal(*non_empty_local_discr) == variant_global_discr);
return wrapInArraysAndConstIfNeeded(makeNullableOrLowCardinalityNullableSafe(variant_column), array_offsets, input_arg_is_const, input_rows_count);
}
/// In general case we should calculate null-mask for variant
/// according to the discriminators column and expand
/// variant column by this mask to get a full column (with default values on NULLs)
const auto & local_discriminators = input_col_as_variant->getLocalDiscriminators();
auto null_map = ColumnUInt8::create();
auto & null_map_data = null_map->getData();
null_map_data.reserve(local_discriminators.size());
auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr);
for (auto local_discr : local_discriminators)
null_map_data.push_back(local_discr != variant_local_discr);
auto expanded_variant_column = IColumn::mutate(variant_column);
if (variant_type->lowCardinality())
expanded_variant_column = assert_cast<ColumnLowCardinality &>(*expanded_variant_column).cloneNullable();
expanded_variant_column->expand(null_map_data, /*inverted = */ true);
if (variant_type->canBeInsideNullable())
return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(expanded_variant_column), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count);
return wrapInArraysAndConstIfNeeded(std::move(expanded_variant_column), array_offsets, input_arg_is_const, input_rows_count);
}
private:
std::optional<size_t> getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const
{
const auto * name_col = checkAndGetColumnConst<ColumnString>(index_column.get());
if (!name_col)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument to {} with Variant argument must be a constant String",
getName());
String variant_element_name = name_col->getValue<String>();
auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name);
if (variant_element_type)
{
const auto & variants = variant_type.getVariants();
for (size_t i = 0; i != variants.size(); ++i)
{
if (variants[i]->getName() == variant_element_type->getName())
return i;
}
}
if (argument_size == 2)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} doesn't contain variant with type {}", variant_type.getName(), variant_element_name);
return std::nullopt;
}
ColumnPtr wrapInArraysAndConstIfNeeded(ColumnPtr res, const Columns & array_offsets, bool input_arg_is_const, size_t input_rows_count) const
{
for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it)
res = ColumnArray::create(res, *it);
if (input_arg_is_const)
res = ColumnConst::create(res, input_rows_count);
return res;
}
};
}
REGISTER_FUNCTION(VariantElement)
{
factory.registerFunction<FunctionVariantElement>(FunctionDocumentation{
.description = R"(
Extracts a column with specified type from a `Variant` column.
)",
.syntax{"tupleElement(variant, type_name, [, default_value])"},
.arguments{{
{"variant", "Variant column"},
{"type_name", "The name of the variant type to extract"},
{"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}},
.examples{{{
"Example",
R"(
CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test;)",
R"(
vvariantElement(v, 'String')variantElement(v, 'UInt64')variantElement(v, 'Array(UInt64)')
[]
42 42 []
Hello, World! Hello, World! []
[1,2,3] [1,2,3]
)"}}},
.categories{"Variant"},
});
}
}

View File

@ -619,13 +619,16 @@ void readQuotedStringInto(Vector & s, ReadBuffer & buf)
readAnyQuotedStringInto<'\'', enable_sql_style_quoting>(s, buf);
}
template <typename Vector>
template <bool enable_sql_style_quoting, typename Vector>
bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf)
{
return readAnyQuotedStringInto<'\'', false, Vector, bool>(s, buf);
return readAnyQuotedStringInto<'\'', enable_sql_style_quoting, Vector, bool>(s, buf);
}
template bool tryReadQuotedStringInto(String & s, ReadBuffer & buf);
template bool tryReadQuotedStringInto<true, String>(String & s, ReadBuffer & buf);
template bool tryReadQuotedStringInto<false, String>(String & s, ReadBuffer & buf);
template bool tryReadQuotedStringInto<true, PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template bool tryReadQuotedStringInto<false, PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template <bool enable_sql_style_quoting, typename Vector>
void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
@ -633,6 +636,16 @@ void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
readAnyQuotedStringInto<'"', enable_sql_style_quoting>(s, buf);
}
template <bool enable_sql_style_quoting, typename Vector>
bool tryReadDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
{
return readAnyQuotedStringInto<'"', enable_sql_style_quoting, Vector, bool>(s, buf);
}
template bool tryReadDoubleQuotedStringInto<true, String>(String & s, ReadBuffer & buf);
template bool tryReadDoubleQuotedStringInto<false, String>(String & s, ReadBuffer & buf);
template <bool enable_sql_style_quoting, typename Vector>
void readBackQuotedStringInto(Vector & s, ReadBuffer & buf)
{
@ -652,6 +665,18 @@ void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
readQuotedStringInto<true>(s, buf);
}
bool tryReadQuotedString(String & s, ReadBuffer & buf)
{
s.clear();
return tryReadQuotedStringInto<false>(s, buf);
}
bool tryReadQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
{
s.clear();
return tryReadQuotedStringInto<true>(s, buf);
}
template void readQuotedStringInto<true>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template void readQuotedStringInto<true>(String & s, ReadBuffer & buf);
@ -672,6 +697,18 @@ void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
readDoubleQuotedStringInto<true>(s, buf);
}
bool tryReadDoubleQuotedString(String & s, ReadBuffer & buf)
{
s.clear();
return tryReadDoubleQuotedStringInto<false>(s, buf);
}
bool tryReadDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf)
{
s.clear();
return tryReadDoubleQuotedStringInto<true>(s, buf);
}
void readBackQuotedString(String & s, ReadBuffer & buf)
{
s.clear();
@ -691,7 +728,7 @@ concept WithResize = requires (T value)
{ value.size() } -> std::integral<>;
};
template <typename Vector, bool include_quotes>
template <typename Vector, bool include_quotes, bool allow_throw>
void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings)
{
/// Empty string
@ -754,12 +791,20 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV &
{
PeekableReadBuffer * peekable_buf = dynamic_cast<PeekableReadBuffer *>(&buf);
if (!peekable_buf)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer");
{
if constexpr (allow_throw)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Reading CSV string with custom delimiter is allowed only when using PeekableReadBuffer");
return;
}
while (true)
{
if (peekable_buf->eof())
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter);
{
if constexpr (allow_throw)
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading CSV string, expected custom delimiter \"{}\"", custom_delimiter);
return;
}
char * next_pos = reinterpret_cast<char *>(memchr(peekable_buf->position(), custom_delimiter[0], peekable_buf->available()));
if (!next_pos)
@ -948,6 +993,9 @@ String readCSVFieldWithTwoPossibleDelimiters(PeekableReadBuffer & buf, const For
template void readCSVStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
template void readCSVStringInto<NullOutput>(NullOutput & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
template void readCSVStringInto<String, false, false>(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
template void readCSVStringInto<String, true, false>(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
template void readCSVStringInto<PaddedPODArray<UInt8>, false, false>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
template <typename Vector, typename ReturnType>
@ -1069,15 +1117,18 @@ ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf)
}
template void readJSONObjectPossiblyInvalid<String>(String & s, ReadBuffer & buf);
template bool readJSONObjectPossiblyInvalid<String, bool>(String & s, ReadBuffer & buf);
template void readJSONObjectPossiblyInvalid<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template bool readJSONObjectPossiblyInvalid<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template <typename Vector>
void readJSONArrayInto(Vector & s, ReadBuffer & buf)
template <typename Vector, typename ReturnType>
ReturnType readJSONArrayInto(Vector & s, ReadBuffer & buf)
{
readJSONObjectOrArrayPossiblyInvalid<Vector, void, '[', ']'>(s, buf);
return readJSONObjectOrArrayPossiblyInvalid<Vector, ReturnType, '[', ']'>(s, buf);
}
template void readJSONArrayInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template void readJSONArrayInto<PaddedPODArray<UInt8>, void>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template bool readJSONArrayInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template <typename ReturnType>
ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
@ -1217,6 +1268,13 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
return false;
}
if constexpr (!throw_exception)
{
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3])
|| !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9]))
return false;
}
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
@ -1240,6 +1298,13 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
return false;
}
if constexpr (!throw_exception)
{
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[3]) || !isNumericASCII(s[4])
|| !isNumericASCII(s[6]) || !isNumericASCII(s[7]))
return false;
}
hour = (s[0] - '0') * 10 + (s[1] - '0');
minute = (s[3] - '0') * 10 + (s[4] - '0');
second = (s[6] - '0') * 10 + (s[7] - '0');
@ -1259,7 +1324,14 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
{
/// Not very efficient.
for (const char * digit_pos = s; digit_pos < s_pos; ++digit_pos)
{
if constexpr (!throw_exception)
{
if (!isNumericASCII(*digit_pos))
return false;
}
datetime = datetime * 10 + *digit_pos - '0';
}
}
datetime *= negative_multiplier;
@ -1282,14 +1354,24 @@ template bool readDateTimeTextFallback<bool, false>(time_t &, ReadBuffer &, cons
template bool readDateTimeTextFallback<bool, true>(time_t &, ReadBuffer &, const DateLUTImpl &);
void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
template <typename ReturnType>
ReturnType skipJSONFieldImpl(ReadBuffer & buf, StringRef name_of_field)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if (buf.eof())
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString());
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString());
return ReturnType(false);
}
else if (*buf.position() == '"') /// skip double-quoted string
{
NullOutput sink;
readJSONStringInto(sink, buf);
if constexpr (throw_exception)
readJSONStringInto(sink, buf);
else if (!tryReadJSONStringInto(sink, buf))
return ReturnType(false);
}
else if (isNumericASCII(*buf.position()) || *buf.position() == '-' || *buf.position() == '+' || *buf.position() == '.') /// skip number
{
@ -1298,19 +1380,32 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
double v;
if (!tryReadFloatText(v, buf))
throw Exception(ErrorCodes::INCORRECT_DATA, "Expected a number field for key '{}'", name_of_field.toString());
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::INCORRECT_DATA, "Expected a number field for key '{}'", name_of_field.toString());
return ReturnType(false);
}
}
else if (*buf.position() == 'n') /// skip null
{
assertString("null", buf);
if constexpr (throw_exception)
assertString("null", buf);
else if (!checkString("null", buf))
return ReturnType(false);
}
else if (*buf.position() == 't') /// skip true
{
assertString("true", buf);
if constexpr (throw_exception)
assertString("true", buf);
else if (!checkString("true", buf))
return ReturnType(false);
}
else if (*buf.position() == 'f') /// skip false
{
assertString("false", buf);
if constexpr (throw_exception)
assertString("false", buf);
else if (!checkString("false", buf))
return ReturnType(false);
}
else if (*buf.position() == '[')
{
@ -1320,12 +1415,16 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
if (!buf.eof() && *buf.position() == ']') /// skip empty array
{
++buf.position();
return;
return ReturnType(true);
}
while (true)
{
skipJSONField(buf, name_of_field);
if constexpr (throw_exception)
skipJSONFieldImpl<ReturnType>(buf, name_of_field);
else if (!skipJSONFieldImpl<ReturnType>(buf, name_of_field))
return ReturnType(false);
skipWhitespaceIfAny(buf);
if (!buf.eof() && *buf.position() == ',')
@ -1339,7 +1438,11 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
break;
}
else
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
return ReturnType(false);
}
}
}
else if (*buf.position() == '{') /// skip whole object
@ -1353,19 +1456,34 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
if (*buf.position() == '"')
{
NullOutput sink;
readJSONStringInto(sink, buf);
if constexpr (throw_exception)
readJSONStringInto(sink, buf);
else if (!tryReadJSONStringInto(sink, buf))
return ReturnType(false);
}
else
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
return ReturnType(false);
}
// ':'
skipWhitespaceIfAny(buf);
if (buf.eof() || !(*buf.position() == ':'))
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString());
return ReturnType(false);
}
++buf.position();
skipWhitespaceIfAny(buf);
skipJSONField(buf, name_of_field);
if constexpr (throw_exception)
skipJSONFieldImpl<ReturnType>(buf, name_of_field);
else if (!skipJSONFieldImpl<ReturnType>(buf, name_of_field))
return ReturnType(false);
skipWhitespaceIfAny(buf);
// optional ','
@ -1377,18 +1495,37 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
}
if (buf.eof())
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString());
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString());
return ReturnType(false);
}
++buf.position();
}
else
{
throw Exception(
ErrorCodes::INCORRECT_DATA,
"Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}",
String(buf.position(), std::min(buf.available(), size_t(10))),
std::string(1, *buf.position()),
name_of_field.empty() ? "" : " for key " + name_of_field.toString());
if constexpr (throw_exception)
throw Exception(
ErrorCodes::INCORRECT_DATA,
"Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}",
String(buf.position(), std::min(buf.available(), size_t(10))),
std::string(1, *buf.position()),
name_of_field.empty() ? "" : " for key " + name_of_field.toString());
return ReturnType(false);
}
return ReturnType(true);
}
void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
{
skipJSONFieldImpl<void>(buf, name_of_field);
}
bool trySkipJSONField(ReadBuffer & buf, StringRef name_of_field)
{
return skipJSONFieldImpl<bool>(buf, name_of_field);
}
@ -1601,23 +1738,31 @@ void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delim
}
// Use PeekableReadBuffer to copy field to string after parsing.
template <typename Vector, typename ParseFunc>
static void readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_func)
template <typename ReturnType, typename Vector, typename ParseFunc>
static ReturnType readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_func)
{
PeekableReadBuffer peekable_buf(buf);
peekable_buf.setCheckpoint();
parse_func(peekable_buf);
if constexpr (std::is_same_v<ReturnType, void>)
parse_func(peekable_buf);
else if (!parse_func(peekable_buf))
return ReturnType(false);
peekable_buf.makeContinuousMemoryFromCheckpointToPos();
auto * end = peekable_buf.position();
peekable_buf.rollbackToCheckpoint();
s.append(peekable_buf.position(), end);
peekable_buf.position() = end;
return ReturnType(true);
}
template <typename Vector>
static void readQuotedStringFieldInto(Vector & s, ReadBuffer & buf)
template <typename ReturnType = void, typename Vector>
static ReturnType readQuotedStringFieldInto(Vector & s, ReadBuffer & buf)
{
assertChar('\'', buf);
if constexpr (std::is_same_v<ReturnType, void>)
assertChar('\'', buf);
else if (!checkChar('\'', buf))
return ReturnType(false);
s.push_back('\'');
while (!buf.eof())
{
@ -1645,16 +1790,23 @@ static void readQuotedStringFieldInto(Vector & s, ReadBuffer & buf)
}
if (buf.eof())
return;
return ReturnType(false);
++buf.position();
s.push_back('\'');
return ReturnType(true);
}
template <char opening_bracket, char closing_bracket, typename Vector>
static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
template <typename ReturnType = void, char opening_bracket, char closing_bracket, typename Vector>
static ReturnType readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
{
assertChar(opening_bracket, buf);
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if constexpr (throw_exception)
assertChar(opening_bracket, buf);
else if (!checkChar(opening_bracket, buf))
return ReturnType(false);
s.push_back(opening_bracket);
size_t balance = 1;
@ -1670,7 +1822,10 @@ static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
if (*buf.position() == '\'')
{
readQuotedStringFieldInto(s, buf);
if constexpr (throw_exception)
readQuotedStringFieldInto<void>(s, buf);
else if (!readQuotedStringFieldInto<bool>(s, buf))
return ReturnType(false);
}
else if (*buf.position() == opening_bracket)
{
@ -1685,13 +1840,20 @@ static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
++buf.position();
}
}
if (balance)
return ReturnType(false);
return ReturnType(true);
}
template <typename Vector>
void readQuotedFieldInto(Vector & s, ReadBuffer & buf)
template <typename ReturnType, typename Vector>
ReturnType readQuotedFieldInto(Vector & s, ReadBuffer & buf)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if (buf.eof())
return;
return ReturnType(false);
/// Possible values in 'Quoted' field:
/// - Strings: '...'
@ -1703,35 +1865,47 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf)
/// - Number: integer, float, decimal.
if (*buf.position() == '\'')
readQuotedStringFieldInto(s, buf);
return readQuotedStringFieldInto<ReturnType>(s, buf);
else if (*buf.position() == '[')
readQuotedFieldInBracketsInto<'[', ']'>(s, buf);
return readQuotedFieldInBracketsInto<ReturnType, '[', ']'>(s, buf);
else if (*buf.position() == '(')
readQuotedFieldInBracketsInto<'(', ')'>(s, buf);
return readQuotedFieldInBracketsInto<ReturnType, '(', ')'>(s, buf);
else if (*buf.position() == '{')
readQuotedFieldInBracketsInto<'{', '}'>(s, buf);
return readQuotedFieldInBracketsInto<ReturnType, '{', '}'>(s, buf);
else if (checkCharCaseInsensitive('n', buf))
{
/// NULL or NaN
if (checkCharCaseInsensitive('u', buf))
{
assertStringCaseInsensitive("ll", buf);
if constexpr (throw_exception)
assertStringCaseInsensitive("ll", buf);
else if (!checkStringCaseInsensitive("ll", buf))
return ReturnType(false);
s.append("NULL");
}
else
{
assertStringCaseInsensitive("an", buf);
if constexpr (throw_exception)
assertStringCaseInsensitive("an", buf);
else if (!checkStringCaseInsensitive("an", buf))
return ReturnType(false);
s.append("NaN");
}
}
else if (checkCharCaseInsensitive('t', buf))
{
assertStringCaseInsensitive("rue", buf);
if constexpr (throw_exception)
assertStringCaseInsensitive("rue", buf);
else if (!checkStringCaseInsensitive("rue", buf))
return ReturnType(false);
s.append("true");
}
else if (checkCharCaseInsensitive('f', buf))
{
assertStringCaseInsensitive("alse", buf);
if constexpr (throw_exception)
assertStringCaseInsensitive("alse", buf);
else if (!checkStringCaseInsensitive("alse", buf))
return ReturnType(false);
s.append("false");
}
else
@ -1740,13 +1914,19 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf)
auto parse_func = [](ReadBuffer & in)
{
Float64 tmp;
readFloatText(tmp, in);
if constexpr (throw_exception)
readFloatText(tmp, in);
else
return tryReadFloatText(tmp, in);
};
readParsedValueInto(s, buf, parse_func);
return readParsedValueInto<ReturnType>(s, buf, parse_func);
}
return ReturnType(true);
}
template void readQuotedFieldInto<NullOutput>(NullOutput & s, ReadBuffer & buf);
template void readQuotedFieldInto<void, NullOutput>(NullOutput & s, ReadBuffer & buf);
void readQuotedField(String & s, ReadBuffer & buf)
{
@ -1754,11 +1934,24 @@ void readQuotedField(String & s, ReadBuffer & buf)
readQuotedFieldInto(s, buf);
}
bool tryReadQuotedField(String & s, ReadBuffer & buf)
{
s.clear();
return readQuotedFieldInto<bool>(s, buf);
}
void readJSONField(String & s, ReadBuffer & buf)
{
s.clear();
auto parse_func = [](ReadBuffer & in) { skipJSONField(in, ""); };
readParsedValueInto(s, buf, parse_func);
readParsedValueInto<void>(s, buf, parse_func);
}
bool tryReadJSONField(String & s, ReadBuffer & buf)
{
s.clear();
auto parse_func = [](ReadBuffer & in) { return trySkipJSONField(in, ""); };
return readParsedValueInto<bool>(s, buf, parse_func);
}
void readTSVField(String & s, ReadBuffer & buf)

View File

@ -258,26 +258,43 @@ inline void readBoolText(bool & x, ReadBuffer & buf)
x = tmp != '0';
}
inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case = false)
template <typename ReturnType = void>
inline ReturnType readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case = false)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if (buf.eof()) [[unlikely]]
throwReadAfterEOF();
{
if constexpr (throw_exception)
throwReadAfterEOF();
else
return ReturnType(false);
}
switch (*buf.position())
{
case 't':
assertString("true", buf);
if constexpr (throw_exception)
assertString("true", buf);
else if (!checkString("true", buf))
return ReturnType(false);
x = true;
break;
case 'f':
assertString("false", buf);
if constexpr (throw_exception)
assertString("false", buf);
else if (!checkString("false", buf))
return ReturnType(false);
x = false;
break;
case 'T':
{
if (support_upper_case)
{
assertString("TRUE", buf);
if constexpr (throw_exception)
assertString("TRUE", buf);
else if (!checkString("TRUE", buf))
return ReturnType(false);
x = true;
break;
}
@ -288,7 +305,10 @@ inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case
{
if (support_upper_case)
{
assertString("FALSE", buf);
if constexpr (throw_exception)
assertString("FALSE", buf);
else if (!checkString("FALSE", buf))
return ReturnType(false);
x = false;
break;
}
@ -296,8 +316,15 @@ inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case
[[fallthrough]];
}
default:
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Unexpected Bool value");
{
if constexpr (throw_exception)
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Unexpected Bool value");
else
return ReturnType(false);
}
}
return ReturnType(true);
}
enum class ReadIntTextCheckOverflow
@ -469,7 +496,10 @@ void readIntText(T & x, ReadBuffer & buf)
template <ReadIntTextCheckOverflow check_overflow = ReadIntTextCheckOverflow::CHECK_OVERFLOW, typename T>
bool tryReadIntText(T & x, ReadBuffer & buf)
{
return readIntTextImpl<T, bool, check_overflow>(x, buf);
if constexpr (is_decimal<T>)
return tryReadIntText<check_overflow>(x.value, buf);
else
return readIntTextImpl<T, bool, check_overflow>(x, buf);
}
@ -478,16 +508,18 @@ bool tryReadIntText(T & x, ReadBuffer & buf)
* - for numbers starting with zero, parsed only zero;
* - symbol '+' before number is not supported;
*/
template <typename T, bool throw_on_error = true>
void readIntTextUnsafe(T & x, ReadBuffer & buf)
template <typename T, typename ReturnType = void>
ReturnType readIntTextUnsafe(T & x, ReadBuffer & buf)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
bool negative = false;
make_unsigned_t<T> res = 0;
auto on_error = []
{
if (throw_on_error)
if constexpr (throw_exception)
throwReadAfterEOF();
return ReturnType(false);
};
if (buf.eof()) [[unlikely]]
@ -505,7 +537,7 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
{
++buf.position();
x = 0;
return;
return ReturnType(true);
}
while (!buf.eof())
@ -524,12 +556,13 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
/// See note about undefined behaviour above.
x = is_signed_v<T> && negative ? -res : res;
return ReturnType(true);
}
template <typename T>
void tryReadIntTextUnsafe(T & x, ReadBuffer & buf)
bool tryReadIntTextUnsafe(T & x, ReadBuffer & buf)
{
return readIntTextUnsafe<T, false>(x, buf);
return readIntTextUnsafe<T, bool>(x, buf);
}
@ -551,9 +584,15 @@ void readEscapedString(String & s, ReadBuffer & buf);
void readQuotedString(String & s, ReadBuffer & buf);
void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
bool tryReadQuotedString(String & s, ReadBuffer & buf);
bool tryReadQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
void readDoubleQuotedString(String & s, ReadBuffer & buf);
void readDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
bool tryReadDoubleQuotedString(String & s, ReadBuffer & buf);
bool tryReadDoubleQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
void readJSONString(String & s, ReadBuffer & buf);
void readBackQuotedString(String & s, ReadBuffer & buf);
@ -616,7 +655,7 @@ void readBackQuotedStringInto(Vector & s, ReadBuffer & buf);
template <typename Vector>
void readStringUntilEOFInto(Vector & s, ReadBuffer & buf);
template <typename Vector, bool include_quotes = false>
template <typename Vector, bool include_quotes = false, bool allow_throw = true>
void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
/// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception.
@ -629,7 +668,7 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
return readJSONStringInto<Vector, bool>(s, buf);
}
template <typename Vector>
template <bool enable_sql_style_quoting, typename Vector>
bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf);
/// Reads chunk of data between {} in that way,
@ -638,8 +677,8 @@ bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf);
template <typename Vector, typename ReturnType = void>
ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf);
template <typename Vector>
void readJSONArrayInto(Vector & s, ReadBuffer & buf);
template <typename Vector, typename ReturnType = void>
ReturnType readJSONArrayInto(Vector & s, ReadBuffer & buf);
template <typename Vector>
void readStringUntilWhitespaceInto(Vector & s, ReadBuffer & buf);
@ -963,6 +1002,13 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
{
if (s[4] < '0' || s[4] > '9')
{
if constexpr (!throw_exception)
{
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3])
|| !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9]))
return ReturnType(false);
}
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
@ -975,6 +1021,13 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
bool dt_long = (s[10] == ' ' || s[10] == 'T');
if (dt_long)
{
if constexpr (!throw_exception)
{
if (!isNumericASCII(s[11]) || !isNumericASCII(s[12]) || !isNumericASCII(s[14]) || !isNumericASCII(s[15])
|| !isNumericASCII(s[17]) || !isNumericASCII(s[18]))
return ReturnType(false);
}
hour = (s[11] - '0') * 10 + (s[12] - '0');
minute = (s[14] - '0') * 10 + (s[15] - '0');
second = (s[17] - '0') * 10 + (s[18] - '0');
@ -1312,6 +1365,11 @@ inline bool tryReadText(is_integer auto & x, ReadBuffer & buf)
return tryReadIntText(x, buf);
}
inline bool tryReadText(is_floating_point auto & x, ReadBuffer & buf)
{
return tryReadFloatText(x, buf);
}
inline bool tryReadText(UUID & x, ReadBuffer & buf) { return tryReadUUIDText(x, buf); }
inline bool tryReadText(IPv4 & x, ReadBuffer & buf) { return tryReadIPv4Text(x, buf); }
inline bool tryReadText(IPv6 & x, ReadBuffer & buf) { return tryReadIPv6Text(x, buf); }
@ -1321,9 +1379,20 @@ inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatTe
inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); }
inline void readText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { readDateText(x, buf, time_zone); }
inline bool tryReadText(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) { return tryReadDateText(x, buf, time_zone); }
inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); }
inline bool tryReadText(LocalDate & x, ReadBuffer & buf) { return tryReadDateText(x, buf); }
inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); }
inline bool tryReadText(LocalDateTime & x, ReadBuffer & buf)
{
time_t time;
if (!tryReadDateTimeText(time, buf))
return false;
x = LocalDateTime(time, DateLUT::instance());
return true;
}
inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); }
inline void readText(IPv4 & x, ReadBuffer & buf) { readIPv4Text(x, buf); }
inline void readText(IPv6 & x, ReadBuffer & buf) { readIPv6Text(x, buf); }
@ -1401,39 +1470,71 @@ inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf)
}
/// CSV for numbers: quotes are optional, no special escaping rules.
template <typename T>
inline void readCSVSimple(T & x, ReadBuffer & buf)
template <typename T, typename ReturnType = void>
inline ReturnType readCSVSimple(T & x, ReadBuffer & buf)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if (buf.eof()) [[unlikely]]
throwReadAfterEOF();
{
if constexpr (throw_exception)
throwReadAfterEOF();
return ReturnType(false);
}
char maybe_quote = *buf.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
++buf.position();
readText(x, buf);
if constexpr (throw_exception)
readText(x, buf);
else if (!tryReadText(x, buf))
return ReturnType(false);
if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, buf);
{
if constexpr (throw_exception)
assertChar(maybe_quote, buf);
else if (!checkChar(maybe_quote, buf))
return ReturnType(false);
}
return ReturnType(true);
}
// standalone overload for dates: to avoid instantiating DateLUTs while parsing other types
template <typename T>
inline void readCSVSimple(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone)
template <typename T, typename ReturnType = void>
inline ReturnType readCSVSimple(T & x, ReadBuffer & buf, const DateLUTImpl & time_zone)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if (buf.eof()) [[unlikely]]
throwReadAfterEOF();
{
if constexpr (throw_exception)
throwReadAfterEOF();
return ReturnType(false);
}
char maybe_quote = *buf.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
++buf.position();
readText(x, buf, time_zone);
if constexpr (throw_exception)
readText(x, buf, time_zone);
else if (!tryReadText(x, buf, time_zone))
return ReturnType(false);
if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, buf);
{
if constexpr (throw_exception)
assertChar(maybe_quote, buf);
else if (!checkChar(maybe_quote, buf))
return ReturnType(false);
}
return ReturnType(true);
}
template <typename T>
@ -1443,18 +1544,52 @@ inline void readCSV(T & x, ReadBuffer & buf)
readCSVSimple(x, buf);
}
template <typename T>
requires is_arithmetic_v<T>
inline bool tryReadCSV(T & x, ReadBuffer & buf)
{
return readCSVSimple<T, bool>(x, buf);
}
inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); }
inline bool tryReadCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings)
{
x.clear();
readCSVStringInto<String, false, false>(x, buf, settings);
return true;
}
inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(LocalDate & x, ReadBuffer & buf) { return readCSVSimple<LocalDate, bool>(x, buf); }
inline void readCSV(DayNum & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(DayNum & x, ReadBuffer & buf) { return readCSVSimple<DayNum, bool>(x, buf); }
inline void readCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { readCSVSimple(x, buf, time_zone); }
inline bool tryReadCSV(DayNum & x, ReadBuffer & buf, const DateLUTImpl & time_zone) { return readCSVSimple<DayNum, bool>(x, buf, time_zone); }
inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(LocalDateTime & x, ReadBuffer & buf) { return readCSVSimple<LocalDateTime, bool>(x, buf); }
inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(UUID & x, ReadBuffer & buf) { return readCSVSimple<UUID, bool>(x, buf); }
inline void readCSV(IPv4 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(IPv4 & x, ReadBuffer & buf) { return readCSVSimple<IPv4, bool>(x, buf); }
inline void readCSV(IPv6 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(IPv6 & x, ReadBuffer & buf) { return readCSVSimple<IPv6, bool>(x, buf); }
inline void readCSV(UInt128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(UInt128 & x, ReadBuffer & buf) { return readCSVSimple<UInt128, bool>(x, buf); }
inline void readCSV(Int128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(Int128 & x, ReadBuffer & buf) { return readCSVSimple<Int128, bool>(x, buf); }
inline void readCSV(UInt256 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(UInt256 & x, ReadBuffer & buf) { return readCSVSimple<UInt256, bool>(x, buf); }
inline void readCSV(Int256 & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
inline bool tryReadCSV(Int256 & x, ReadBuffer & buf) { return readCSVSimple<Int256, bool>(x, buf); }
template <typename T>
void readBinary(std::vector<T> & x, ReadBuffer & buf)
@ -1536,6 +1671,7 @@ inline void skipWhitespaceIfAny(ReadBuffer & buf, bool one_line = false)
/// Skips json value.
void skipJSONField(ReadBuffer & buf, StringRef name_of_field);
bool trySkipJSONField(ReadBuffer & buf, StringRef name_of_field);
/** Read serialized exception.
@ -1750,12 +1886,14 @@ struct PcgDeserializer
}
};
template <typename Vector>
void readQuotedFieldInto(Vector & s, ReadBuffer & buf);
template <typename ReturnType = void, typename Vector>
ReturnType readQuotedFieldInto(Vector & s, ReadBuffer & buf);
void readQuotedField(String & s, ReadBuffer & buf);
bool tryReadQuotedField(String & s, ReadBuffer & buf);
void readJSONField(String & s, ReadBuffer & buf);
bool tryReadJSONField(String & s, ReadBuffer & buf);
void readTSVField(String & s, ReadBuffer & buf);

View File

@ -224,4 +224,24 @@ inline void readCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint
assertChar(maybe_quote, buf);
}
template <typename T>
inline bool tryReadCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)
{
if (buf.eof())
return false;
char maybe_quote = *buf.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
++buf.position();
if (!tryReadDecimalText(buf, x, precision, scale))
return false;
if ((maybe_quote == '\'' || maybe_quote == '\"') && !checkChar(maybe_quote, buf))
return false;
return true;
}
}

Some files were not shown because too many files have changed in this diff Show More