Merge branch 'ClickHouse:master' into master

This commit is contained in:
iceFireser 2024-08-27 20:51:47 +08:00 committed by GitHub
commit 55eb2953c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
148 changed files with 2156 additions and 811 deletions

View File

@ -54,6 +54,7 @@ Other upcoming meetups
* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
## Recent Recordings

View File

@ -311,6 +311,14 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags)
while (mustRetry(rc, remaining_time));
if (rc <= 0)
{
// At this stage we still can have last not yet received SSL message containing SSL error
// so make a read to force SSL to process possible SSL error
if (SSL_get_error(_pSSL, rc) == SSL_ERROR_SYSCALL && SocketImpl::lastError() == POCO_ECONNRESET)
{
char c = 0;
SSL_read(_pSSL, &c, 1);
}
rc = handleError(rc);
if (rc == 0) throw SSLConnectionUnexpectedlyClosedException();
}

View File

@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "x86_64-pc-freebsd11")
set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64")
# dprintf is used in a patched version of replxx
add_compile_definitions(_WITH_DPRINTF)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 5d04501f93a4fb7f0bb8b73b8f614bc986f9e25b
Subproject commit 711c18e7f4d951255aa8b0851e5a55d5a5fb0ddb

View File

@ -112,3 +112,5 @@ wadllib==1.3.6
websocket-client==0.59.0
wheel==0.37.1
zipp==1.0.0
deltalake==0.16.0

View File

@ -6,28 +6,34 @@ sidebar_label: Iceberg
# Iceberg Table Engine
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3.
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure and locally stored tables.
## Create Table
Note that the Iceberg table must already exist in S3, this command does not take DDL parameters to create a new table.
Note that the Iceberg table must already exist in the storage, this command does not take DDL parameters to create a new table.
``` sql
CREATE TABLE iceberg_table
ENGINE = Iceberg(url, [aws_access_key_id, aws_secret_access_key,])
CREATE TABLE iceberg_table_s3
ENGINE = IcebergS3(url, [, NOSIGN | access_key_id, secret_access_key, [session_token]], format, [,compression])
CREATE TABLE iceberg_table_azure
ENGINE = IcebergAzure(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])
CREATE TABLE iceberg_table_local
ENGINE = IcebergLocal(path_to_table, [,format] [,compression_method])
```
**Engine parameters**
**Engine arguments**
- `url` — url with the path to an existing Iceberg table.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
Description of the arguments coincides with description of arguments in engines `S3`, `AzureBlobStorage` and `File` correspondingly.
`format` stands for the format of data files in the Iceberg table.
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
**Example**
```sql
CREATE TABLE iceberg_table ENGINE=Iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
CREATE TABLE iceberg_table ENGINE=IcebergS3('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
```
Using named collections:
@ -45,9 +51,15 @@ Using named collections:
```
```sql
CREATE TABLE iceberg_table ENGINE=Iceberg(iceberg_conf, filename = 'test_table')
CREATE TABLE iceberg_table ENGINE=IcebergS3(iceberg_conf, filename = 'test_table')
```
**Aliases**
Table engine `Iceberg` is an alias to `IcebergS3` now.
## See also
- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md)

View File

@ -1389,7 +1389,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
#### schema_inference_make_columns_nullable
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if `input_format_null_as_default` is disabled and the column contains `NULL` in a sample that is parsed during schema inference.
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
Enabled by default.
@ -1412,15 +1412,13 @@ DESC format(JSONEachRow, $$
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
```sql
SET schema_inference_make_columns_nullable = 0;
SET input_format_null_as_default = 0;
SET schema_inference_make_columns_nullable = 'auto';
DESC format(JSONEachRow, $$
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
$$)
```
```response
┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ Int64 │ │ │ │ │ │
│ age │ Int64 │ │ │ │ │ │
@ -1432,7 +1430,6 @@ DESC format(JSONEachRow, $$
```sql
SET schema_inference_make_columns_nullable = 0;
SET input_format_null_as_default = 1;
DESC format(JSONEachRow, $$
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}

View File

@ -171,8 +171,8 @@ If the `schema_inference_hints` is not formated properly, or if there is a typo
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference.
Controls making inferred types `Nullable` in schema inference.
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
Default value: `true`.

View File

@ -6,7 +6,7 @@ title: "Functions for Working with Geohash"
## Geohash
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earths surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location.
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earths surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer the geohash string is, the more precise the geographic location will be.
If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).
@ -14,26 +14,37 @@ If you need to manually convert geographic coordinates to geohash strings, you c
Encodes latitude and longitude as a [geohash](#geohash)-string.
**Syntax**
``` sql
geohashEncode(longitude, latitude, [precision])
```
**Input values**
- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`
- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`
- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`.
- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data-types/float.md).
- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data-types/float.md).
- `precision` (optional) — Length of the resulting encoded string. Defaults to `12`. Integer in the range `[1, 12]`. [Int8](../../data-types/int-uint.md).
:::note
- All coordinate parameters must be of the same type: either `Float32` or `Float64`.
- For the `precision` parameter, any value less than `1` or greater than `12` is silently converted to `12`.
:::
**Returned values**
- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used).
- Alphanumeric string of the encoded coordinate (modified version of the base32-encoding alphabet is used). [String](../../data-types/string.md).
**Example**
Query:
``` sql
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res;
```
Result:
``` text
┌─res──────────┐
│ ezs42d000000 │
@ -44,13 +55,19 @@ SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res;
Decodes any [geohash](#geohash)-encoded string into longitude and latitude.
**Syntax**
```sql
geohashDecode(hash_str)
```
**Input values**
- encoded string - geohash-encoded string.
- `hash_str` — Geohash-encoded string.
**Returned values**
- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude.
- Tuple `(longitude, latitude)` of `Float64` values of longitude and latitude. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md))
**Example**

View File

@ -688,6 +688,40 @@ SELECT kostikConsistentHash(16045690984833335023, 2);
└───────────────────────────────────────────────┘
```
## ripeMD160
Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value.
**Syntax**
```sql
ripeMD160(input)
```
**Parameters**
- `input`: Input string. [String](../data-types/string.md)
**Returned value**
- A [UInt256](../data-types/int-uint.md) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded.
**Example**
Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
Query:
```sql
SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
```
```response
┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
└───────────────────────────────────────────────────────────────┘
```
## murmurHash2_32, murmurHash2_64
Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.

View File

@ -6,35 +6,37 @@ sidebar_label: iceberg
# iceberg Table Function
Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3.
Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure or locally stored.
## Syntax
``` sql
iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure])
icebergS3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,compression_method])
icebergS3(named_collection[, option=value [,..]])
icebergAzure(connection_string|storage_account_url, container_name, blobpath, [,account_name], [,account_key] [,format] [,compression_method])
icebergAzure(named_collection[, option=value [,..]])
icebergLocal(path_to_table, [,format] [,compression_method])
icebergLocal(named_collection[, option=value [,..]])
```
## Arguments
- `url` — Bucket url with the path to an existing Iceberg table in S3.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
Description of the arguments coincides with description of arguments in table functions `s3`, `azureBlobStorage` and `file` correspondingly.
`format` stands for the format of data files in the Iceberg table.
**Returned value**
A table with the specified structure for reading data in the specified Iceberg table in S3.
A table with the specified structure for reading data in the specified Iceberg table.
**Example**
```sql
SELECT * FROM iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
SELECT * FROM icebergS3('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
```
:::important
ClickHouse currently supports reading v1 (v2 support is coming soon!) of the Iceberg format via the `iceberg` table function and `Iceberg` table engine.
ClickHouse currently supports reading v1 and v2 of the Iceberg format via the `icebergS3`, `icebergAzure` and `icebergLocal` table functions and `IcebergS3`, `icebergAzure` ans `icebergLocal` table engines.
:::
## Defining a named collection
@ -56,10 +58,14 @@ Here is an example of configuring a named collection for storing the URL and cre
```
```sql
SELECT * FROM iceberg(iceberg_conf, filename = 'test_table')
DESCRIBE iceberg(iceberg_conf, filename = 'test_table')
SELECT * FROM icebergS3(iceberg_conf, filename = 'test_table')
DESCRIBE icebergS3(iceberg_conf, filename = 'test_table')
```
**Aliases**
Table function `iceberg` is an alias to `icebergS3` now.
**See Also**
- [Iceberg engine](/docs/en/engines/table-engines/integrations/iceberg.md)

View File

@ -124,6 +124,40 @@ SELECT hex(sipHash128('foo', '\x01', 3));
└──────────────────────────────────┘
```
## ripeMD160
Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки.
**Синтаксис**
```sql
ripeMD160(input)
```
**Аргументы**
- `input`: Строка [String](../data-types/string.md)
**Возвращаемое значение**
- [UInt256](../data-types/int-uint.md), где 160-битный хеш RIPEMD-160 хранится в первых 20 байтах. Оставшиеся 12 байт заполняются нулями.
**Пример**
Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой
Запрос:
```sql
SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
```
Результат:
```response
┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
└───────────────────────────────────────────────────────────────┘
```
## cityHash64 {#cityhash64}
Генерирует 64-х битное значение [CityHash](https://github.com/google/cityhash).

View File

@ -978,6 +978,7 @@ try
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
* At this moment, no one could own shared part of Context.
*/
global_context->resetSharedContext();
global_context.reset();
shared_context.reset();
LOG_DEBUG(log, "Destroyed global context.");

View File

@ -692,7 +692,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage(
result_column_node = it->second;
}
/// Check if it's a dynamic subcolumn
else
else if (table_expression_data.supports_subcolumns)
{
auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name);
auto jt = table_expression_data.column_name_to_column_node.find(column_name);

View File

@ -4379,7 +4379,10 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
if (storage_snapshot->storage.supportsSubcolumns())
{
get_column_options.withSubcolumns();
table_expression_data.supports_subcolumns = true;
}
auto column_names_and_types = storage_snapshot->getColumns(get_column_options);
table_expression_data.column_names_and_types = NamesAndTypes(column_names_and_types.begin(), column_names_and_types.end());

View File

@ -36,6 +36,7 @@ struct AnalysisTableExpressionData
std::string database_name;
std::string table_name;
bool should_qualify_columns = true;
bool supports_subcolumns = false;
NamesAndTypes column_names_and_types;
ColumnNameToColumnNodeMap column_name_to_column_node;
std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns

View File

@ -100,6 +100,7 @@ protected:
auto buf = BuilderRWBufferFromHTTP(getPingURI())
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withTimeouts(getHTTPTimeouts())
.withSettings(getContext()->getReadSettings())
.create(credentials);
return checkString(PING_OK_ANSWER, *buf);
@ -206,6 +207,7 @@ protected:
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(getHTTPTimeouts())
.withSettings(getContext()->getReadSettings())
.create(credentials);
bool res = false;
@ -232,6 +234,7 @@ protected:
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withTimeouts(getHTTPTimeouts())
.withSettings(getContext()->getReadSettings())
.create(credentials);
std::string character;

View File

@ -111,6 +111,7 @@ add_headers_and_sources(dbms Storages/ObjectStorage)
add_headers_and_sources(dbms Storages/ObjectStorage/Azure)
add_headers_and_sources(dbms Storages/ObjectStorage/S3)
add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
add_headers_and_sources(dbms Storages/ObjectStorage/Local)
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
add_headers_and_sources(dbms Common/NamedCollections)

View File

@ -145,6 +145,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
/// work we need to pass host name separately. It will be send into TLS Hello packet to let
/// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI).
static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setPeerHostName(host);
/// we want to postpone SSL handshake until first read or write operation
/// so any errors during negotiation would be properly processed
static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setLazyHandshake(true);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "tcp_secure protocol is disabled because poco library was built without NetSSL support.");
#endif

View File

@ -299,13 +299,14 @@ ReplxxLineReader::ReplxxLineReader(
Patterns delimiters_,
const char word_break_characters_[],
replxx::Replxx::highlighter_callback_t highlighter_,
[[ maybe_unused ]] std::istream & input_stream_,
[[ maybe_unused ]] std::ostream & output_stream_,
[[ maybe_unused ]] int in_fd_,
[[ maybe_unused ]] int out_fd_,
[[ maybe_unused ]] int err_fd_
std::istream & input_stream_,
std::ostream & output_stream_,
int in_fd_,
int out_fd_,
int err_fd_
)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_)
, rx(input_stream_, output_stream_, in_fd_, out_fd_, err_fd_)
, highlighter(std::move(highlighter_))
, word_break_characters(word_break_characters_)
, editor(getEditor())
@ -516,7 +517,7 @@ void ReplxxLineReader::addToHistory(const String & line)
rx.history_add(line);
// flush changes to the disk
if (!rx.history_save(history_file_path))
if (history_file_fd >= 0 && !rx.history_save(history_file_path))
rx.print("Saving history failed: %s\n", errnoToString().c_str());
if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN))

View File

@ -1181,13 +1181,14 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
/// Check if the number of all dynamic types exceeds the limit.
if (!canAddNewVariants(0, all_variants.size()))
{
/// Create list of variants with their sizes and sort it.
std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
/// Create a list of variants with their sizes and names and then sort it.
std::vector<std::tuple<size_t, String, DataTypePtr>> variants_with_sizes;
variants_with_sizes.reserve(all_variants.size());
for (const auto & variant : all_variants)
{
if (variant->getName() != getSharedVariantTypeName())
variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant);
auto variant_name = variant->getName();
if (variant_name != getSharedVariantTypeName())
variants_with_sizes.emplace_back(total_sizes[variant_name], variant_name, variant);
}
std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
@ -1196,14 +1197,14 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant.
/// Add shared variant.
result_variants.push_back(getSharedVariantDataType());
for (const auto & [size, variant] : variants_with_sizes)
for (const auto & [size, variant_name, variant_type] : variants_with_sizes)
{
/// Add variant to the resulting variants list until we reach max_dynamic_types.
if (canAddNewVariant(result_variants.size()))
result_variants.push_back(variant);
result_variants.push_back(variant_type);
/// Add all remaining variants into shared_variants_statistics until we reach its max size.
else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
new_statistics.shared_variants_statistics[variant->getName()] = size;
new_statistics.shared_variants_statistics[variant_name] = size;
else
break;
}

View File

@ -127,7 +127,7 @@ std::string ColumnObject::getName() const
{
WriteBufferFromOwnString ss;
ss << "Object(";
ss << "max_dynamic_paths=" << max_dynamic_paths;
ss << "max_dynamic_paths=" << global_max_dynamic_paths;
ss << ", max_dynamic_types=" << max_dynamic_types;
std::vector<String> sorted_typed_paths;
sorted_typed_paths.reserve(typed_paths.size());
@ -1045,9 +1045,9 @@ void ColumnObject::forEachSubcolumnRecursively(DB::IColumn::RecursiveMutableColu
bool ColumnObject::structureEquals(const IColumn & rhs) const
{
/// 2 Object columns have equal structure if they have the same typed paths and max_dynamic_paths/max_dynamic_types.
/// 2 Object columns have equal structure if they have the same typed paths and global_max_dynamic_paths/max_dynamic_types.
const auto * rhs_object = typeid_cast<const ColumnObject *>(&rhs);
if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || max_dynamic_paths != rhs_object->max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types)
if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types)
return false;
for (const auto & [path, column] : typed_paths)

View File

@ -953,7 +953,7 @@ ColumnPtr ColumnVariant::index(const IColumn & indexes, size_t limit) const
{
/// If we have only NULLs, index will take no effect, just return resized column.
if (hasOnlyNulls())
return cloneResized(limit);
return cloneResized(limit == 0 ? indexes.size(): limit);
/// Optimization when we have only one non empty variant and no NULLs.
/// In this case local_discriminators column is filled with identical values and offsets column
@ -1009,8 +1009,16 @@ ColumnPtr ColumnVariant::indexImpl(const PaddedPODArray<Type> & indexes, size_t
new_variants.reserve(num_variants);
for (size_t i = 0; i != num_variants; ++i)
{
size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size();
new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit));
/// Check if no values from this variant were selected.
if (nested_perms[i].empty())
{
new_variants.emplace_back(variants[i]->cloneEmpty());
}
else
{
size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size();
new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit));
}
}
/// We cannot use new_offsets column as an offset column, because it became invalid after variants permutation.

View File

@ -1120,7 +1120,7 @@ class IColumn;
M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHouse will use information about nullability from the data.", 0) \
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \

View File

@ -22,7 +22,6 @@
#include <cstring>
#include <unistd.h>
#include <algorithm>
#include <typeinfo>
#include <iostream>
#include <memory>

View File

@ -43,39 +43,21 @@ bool LocalObjectStorage::exists(const StoredObject & object) const
std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObjects( /// NOLINT
const StoredObjects & objects,
const ReadSettings & read_settings,
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const
std::optional<size_t>,
std::optional<size_t>) const
{
auto modified_settings = patchSettings(read_settings);
auto global_context = Context::getGlobalContextInstance();
auto read_buffer_creator =
[=] (bool /* restricted_seek */, const StoredObject & object)
-> std::unique_ptr<ReadBufferFromFileBase>
{
return createReadBufferFromFileBase(object.remote_path, modified_settings, read_hint, file_size);
};
auto read_buffer_creator = [=](bool /* restricted_seek */, const StoredObject & object) -> std::unique_ptr<ReadBufferFromFileBase>
{ return std::make_unique<ReadBufferFromFile>(object.remote_path); };
switch (read_settings.remote_fs_method)
{
case RemoteFSReadMethod::read:
{
return std::make_unique<ReadBufferFromRemoteFSGather>(
std::move(read_buffer_creator), objects, "file:", modified_settings,
global_context->getFilesystemCacheLog(), /* use_external_buffer */false);
}
case RemoteFSReadMethod::threadpool:
{
auto impl = std::make_unique<ReadBufferFromRemoteFSGather>(
std::move(read_buffer_creator), objects, "file:", modified_settings,
global_context->getFilesystemCacheLog(), /* use_external_buffer */true);
auto & reader = global_context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
return std::make_unique<AsynchronousBoundedReadBuffer>(
std::move(impl), reader, read_settings,
global_context->getAsyncReadCounters(),
global_context->getFilesystemReadPrefetchesLog());
}
}
return std::make_unique<ReadBufferFromRemoteFSGather>(
std::move(read_buffer_creator),
objects,
"file:",
modified_settings,
global_context->getFilesystemCacheLog(),
/* use_external_buffer */ false);
}
ReadSettings LocalObjectStorage::patchSettings(const ReadSettings & read_settings) const

View File

@ -257,7 +257,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference;
format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
format_settings.schema_inference_hints = settings.schema_inference_hints;
format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable;
format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable.valueOr(2);
format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name;
format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names;
format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size;

View File

@ -77,7 +77,7 @@ struct FormatSettings
Raw
};
bool schema_inference_make_columns_nullable = true;
UInt64 schema_inference_make_columns_nullable = 1;
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;

View File

@ -1344,7 +1344,11 @@ namespace
if (checkCharCaseInsensitive('n', buf))
{
if (checkStringCaseInsensitive("ull", buf))
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
{
if (settings.schema_inference_make_columns_nullable == 0)
return std::make_shared<DataTypeNothing>();
return makeNullable(std::make_shared<DataTypeNothing>());
}
else if (checkStringCaseInsensitive("an", buf))
return std::make_shared<DataTypeFloat64>();
}

View File

@ -19,7 +19,9 @@
#include <Common/HashTable/Hash.h>
#if USE_SSL
# include <openssl/evp.h>
# include <openssl/md5.h>
# include <openssl/ripemd.h>
#endif
#include <bit>
@ -196,6 +198,34 @@ T combineHashesFunc(T t1, T t2)
return HashFunction::apply(reinterpret_cast<const char *>(hashes), sizeof(hashes));
}
#if USE_SSL
struct RipeMD160Impl
{
static constexpr auto name = "ripeMD160";
using ReturnType = UInt256;
static UInt256 apply(const char * begin, size_t size)
{
UInt8 digest[RIPEMD160_DIGEST_LENGTH];
RIPEMD160(reinterpret_cast<const unsigned char *>(begin), size, reinterpret_cast<unsigned char *>(digest));
std::reverse(digest, digest + RIPEMD160_DIGEST_LENGTH);
UInt256 res = 0;
std::memcpy(&res, digest, RIPEMD160_DIGEST_LENGTH);
return res;
}
static UInt256 combineHashes(UInt256 h1, UInt256 h2)
{
return combineHashesFunc<UInt256, RipeMD160Impl>(h1, h2);
}
static constexpr bool use_int_hash_for_pods = false;
};
#endif
struct SipHash64Impl
{
@ -1624,6 +1654,7 @@ using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
#if USE_SSL
using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;
using FunctionRipeMD160Hash = FunctionAnyHash<RipeMD160Impl>;
#endif
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
@ -1652,6 +1683,7 @@ using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
using FunctionXXH3 = FunctionAnyHash<ImplXXH3>;
using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
}
#pragma clang diagnostic pop

View File

@ -0,0 +1,23 @@
#include "FunctionsHashing.h"
#include <Functions/FunctionFactory.h>
/// FunctionsHashing instantiations are separated into files FunctionsHashing*.cpp
/// to better parallelize the build procedure and avoid MSan build failure
/// due to excessive resource consumption.
namespace DB
{
#if USE_SSL
REGISTER_FUNCTION(HashingRipe)
{
factory.registerFunction<FunctionRipeMD160Hash>(FunctionDocumentation{
.description = "RIPEMD-160 hash function, primarily used in Bitcoin address generation.",
.examples{{"", "SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));", R"(
hex(ripeMD160('The quick brown fox jumps over the lazy dog'))
37F332F68DB77BD9D7EDD4969571AD671CF9DD3B
)"}},
.categories{"Hash"}});
}
#endif
}

View File

@ -1598,6 +1598,9 @@ ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & argu
const auto & tuple_columns = col_nested->getColumns();
size_t tuple_size = tuple_columns.size();
if (tuple_size == 0)
return ColumnTuple::create(input_rows_count);
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(
*typeid_cast<const DataTypeArray &>(*arguments[0].type).getNestedType()).getElements();

View File

@ -787,7 +787,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
/// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds.
/// But the connection timeout should be small because there is the case when there is no IMDS at all,
/// like outside of the cloud, on your own machines.
aws_client_configuration.connectTimeoutMs = 10;
aws_client_configuration.connectTimeoutMs = 50;
aws_client_configuration.requestTimeoutMs = 1000;
aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);

View File

@ -893,6 +893,12 @@ ContextData::ContextData(const ContextData &o) :
{
}
void ContextData::resetSharedContext()
{
std::lock_guard<std::mutex> lock(mutex_shared_context);
shared = nullptr;
}
Context::Context() = default;
Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this<Context>(rhs) {}
@ -914,14 +920,6 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part)
return res;
}
void Context::initGlobal()
{
assert(!global_context_instance);
global_context_instance = shared_from_this();
DatabaseCatalog::init(shared_from_this());
EventNotifier::init();
}
SharedContextHolder Context::createShared()
{
return SharedContextHolder(std::make_unique<ContextSharedPart>());
@ -2692,7 +2690,11 @@ void Context::makeSessionContext()
void Context::makeGlobalContext()
{
initGlobal();
assert(!global_context_instance);
global_context_instance = shared_from_this();
DatabaseCatalog::init(shared_from_this());
EventNotifier::init();
global_context = shared_from_this();
}
@ -4088,8 +4090,13 @@ void Context::initializeTraceCollector()
}
/// Call after unexpected crash happen.
void Context::handleCrash() const TSA_NO_THREAD_SAFETY_ANALYSIS
void Context::handleCrash() const
{
std::lock_guard<std::mutex> lock(mutex_shared_context);
if (!shared)
return;
SharedLockGuard lock2(shared->mutex);
if (shared->system_logs)
shared->system_logs->handleCrash();
}

View File

@ -492,6 +492,8 @@ public:
KitchenSink kitchen_sink;
void resetSharedContext();
protected:
using SampleBlockCache = std::unordered_map<std::string, Block>;
mutable SampleBlockCache sample_block_cache;
@ -529,6 +531,10 @@ protected:
mutable ThrottlerPtr local_write_query_throttler; /// A query-wide throttler for local IO writes
mutable ThrottlerPtr backups_query_throttler; /// A query-wide throttler for BACKUPs
mutable std::mutex mutex_shared_context; /// mutex to avoid accessing destroyed shared context pointer
/// some Context methods can be called after the shared context is destroyed
/// example, Context::handleCrash() method - called from signal handler
};
/** A set of known objects that can be used in the query.
@ -1387,8 +1393,6 @@ private:
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderWithLock(const std::lock_guard<std::mutex> & lock);
void initGlobal();
void setUserID(const UUID & user_id_);
void setCurrentRolesImpl(const std::vector<UUID> & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr<const User> & user);

View File

@ -701,7 +701,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
}
column.statistics.column_name = column.name; /// We assign column name here for better exception error message.
if (col_decl.statistics_desc)
{
if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics)

View File

@ -54,13 +54,8 @@ void checkFinalInferredType(
type = default_type;
}
if (settings.schema_inference_make_columns_nullable)
if (settings.schema_inference_make_columns_nullable == 1)
type = makeNullableRecursively(type);
/// In case when data for some column could contain nulls and regular values,
/// resulting inferred type is Nullable.
/// If input_format_null_as_default is enabled, we should remove Nullable type.
else if (settings.null_as_default)
type = removeNullable(type);
}
void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)

View File

@ -204,8 +204,11 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
schema = file_reader->schema();
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference);
if (format_settings.schema_inference_make_columns_nullable)
*schema,
stream ? "ArrowStream" : "Arrow",
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList();
}

View File

@ -727,6 +727,7 @@ struct ReadColumnFromArrowColumnSettings
FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior;
bool allow_arrow_null_type;
bool skip_columns_with_unsupported_types;
bool allow_inferring_nullable_columns;
};
static ColumnWithTypeAndName readColumnFromArrowColumn(
@ -1109,7 +1110,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
bool is_map_nested_column,
const ReadColumnFromArrowColumnSettings & settings)
{
bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable());
bool read_as_nullable_column = (arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable())) && settings.allow_inferring_nullable_columns;
if (read_as_nullable_column &&
arrow_column->type()->id() != arrow::Type::LIST &&
arrow_column->type()->id() != arrow::Type::LARGE_LIST &&
@ -1173,14 +1174,16 @@ static std::shared_ptr<arrow::ChunkedArray> createArrowColumn(const std::shared_
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
const arrow::Schema & schema,
const std::string & format_name,
bool skip_columns_with_unsupported_types)
bool skip_columns_with_unsupported_types,
bool allow_inferring_nullable_columns)
{
ReadColumnFromArrowColumnSettings settings
{
.format_name = format_name,
.date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore,
.allow_arrow_null_type = false,
.skip_columns_with_unsupported_types = skip_columns_with_unsupported_types
.skip_columns_with_unsupported_types = skip_columns_with_unsupported_types,
.allow_inferring_nullable_columns = allow_inferring_nullable_columns,
};
ColumnsWithTypeAndName sample_columns;
@ -1254,7 +1257,8 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam
.format_name = format_name,
.date_time_overflow_behavior = date_time_overflow_behavior,
.allow_arrow_null_type = true,
.skip_columns_with_unsupported_types = false
.skip_columns_with_unsupported_types = false,
.allow_inferring_nullable_columns = true
};
Columns columns;

View File

@ -34,7 +34,8 @@ public:
static Block arrowSchemaToCHHeader(
const arrow::Schema & schema,
const std::string & format_name,
bool skip_columns_with_unsupported_types = false);
bool skip_columns_with_unsupported_types = false,
bool allow_inferring_nullable_columns = true);
struct DictionaryInfo
{

View File

@ -15,8 +15,8 @@ namespace ErrorCodes
}
template <bool with_defaults>
BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
: RowInputFormatWithNamesAndTypes(
BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
: RowInputFormatWithNamesAndTypes<BinaryFormatReader<with_defaults>>(
header,
in_,
params_,

View File

@ -10,13 +10,16 @@ namespace DB
class ReadBuffer;
template <bool>
class BinaryFormatReader;
/** A stream for inputting data in a binary line-by-line format.
*/
template <bool with_defaults = false>
class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes
class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes<BinaryFormatReader<with_defaults>>
{
public:
BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
String getName() const override { return "BinaryRowInputFormat"; }

View File

@ -61,7 +61,7 @@ CSVRowInputFormat::CSVRowInputFormat(
bool with_names_,
bool with_types_,
const FormatSettings & format_settings_,
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_)
std::unique_ptr<CSVFormatReader> format_reader_)
: RowInputFormatWithNamesAndTypes(
header_,
*in_,

View File

@ -1,7 +1,6 @@
#pragma once
#include <optional>
#include <unordered_map>
#include <Core/Block.h>
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
@ -13,10 +12,12 @@
namespace DB
{
class CSVFormatReader;
/** A stream for inputting data in csv format.
* Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values.
*/
class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes
class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes<CSVFormatReader>
{
public:
/** with_names - in the first line the header with column names
@ -32,7 +33,7 @@ public:
protected:
CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_, const Params & params_,
bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_);
bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr<CSVFormatReader> format_reader_);
CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_buf_, const Params & params_,
bool with_names_, bool with_types_, const FormatSettings & format_settings_);

View File

@ -9,7 +9,8 @@
namespace DB
{
class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes
class CustomSeparatedFormatReader;
class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes<CustomSeparatedFormatReader>
{
public:
CustomSeparatedRowInputFormat(

View File

@ -11,7 +11,7 @@ namespace DB
{
class ReadBuffer;
class JSONCompactEachRowFormatReader;
/** A stream for reading data in a bunch of formats:
* - JSONCompactEachRow
@ -20,7 +20,7 @@ class ReadBuffer;
* - JSONCompactStringsEachRowWithNamesAndTypes
*
*/
class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes
class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes<JSONCompactEachRowFormatReader>
{
public:
JSONCompactEachRowRowInputFormat(

View File

@ -14,7 +14,7 @@ namespace ErrorCodes
JSONCompactRowInputFormat::JSONCompactRowInputFormat(
const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_)
: RowInputFormatWithNamesAndTypes(
: RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>(
header_, in_, params_, false, false, false, format_settings_, std::make_unique<JSONCompactFormatReader>(in_, format_settings_))
{
}

View File

@ -5,8 +5,8 @@
namespace DB
{
class JSONCompactRowInputFormat final : public RowInputFormatWithNamesAndTypes
class JSONCompactFormatReader;
class JSONCompactRowInputFormat final : public RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>
{
public:
JSONCompactRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_);

View File

@ -1002,7 +1002,7 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
header.insert(ColumnWithTypeAndName{type, name});
}
if (format_settings.schema_inference_make_columns_nullable)
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList();
}

View File

@ -160,8 +160,11 @@ NamesAndTypesList ORCSchemaReader::readSchema()
{
initializeIfNeeded();
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference);
if (format_settings.schema_inference_make_columns_nullable)
*schema,
"ORC",
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList();
}

View File

@ -869,8 +869,11 @@ NamesAndTypesList ParquetSchemaReader::readSchema()
THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema));
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference);
if (format_settings.schema_inference_make_columns_nullable)
*schema,
"Parquet",
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList();
}

View File

@ -10,9 +10,11 @@
namespace DB
{
class TabSeparatedFormatReader;
/** A stream to input data in tsv format.
*/
class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes
class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes<TabSeparatedFormatReader>
{
public:
/** with_names - the first line is the header with the names of the columns

View File

@ -1,14 +1,20 @@
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
#include <Processors/Formats/ISchemaReader.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <IO/ReadHelpers.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <IO/PeekableReadBuffer.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/Operators.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <Processors/Formats/ISchemaReader.h>
#include <Processors/Formats/Impl/BinaryRowInputFormat.h>
#include <Processors/Formats/Impl/CSVRowInputFormat.h>
#include <Processors/Formats/Impl/CustomSeparatedRowInputFormat.h>
#include <Processors/Formats/Impl/HiveTextRowInputFormat.h>
#include <Processors/Formats/Impl/JSONCompactRowInputFormat.h>
#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
namespace DB
@ -44,7 +50,8 @@ namespace
}
}
RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
template <typename FormatReaderImpl>
RowInputFormatWithNamesAndTypes<FormatReaderImpl>::RowInputFormatWithNamesAndTypes(
const Block & header_,
ReadBuffer & in_,
const Params & params_,
@ -52,7 +59,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
bool with_names_,
bool with_types_,
const FormatSettings & format_settings_,
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_,
std::unique_ptr<FormatReaderImpl> format_reader_,
bool try_detect_header_)
: RowInputFormatWithDiagnosticInfo(header_, in_, params_)
, format_settings(format_settings_)
@ -66,7 +73,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap();
}
void RowInputFormatWithNamesAndTypes::readPrefix()
template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::readPrefix()
{
/// Search and remove BOM only in textual formats (CSV, TSV etc), not in binary ones (RowBinary*).
/// Also, we assume that column name or type cannot contain BOM, so, if format has header,
@ -138,7 +146,8 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
}
}
void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & column_names_out, std::vector<String> & type_names_out)
template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::tryDetectHeader(std::vector<String> & column_names_out, std::vector<String> & type_names_out)
{
auto & read_buf = getReadBuffer();
PeekableReadBuffer * peekable_buf = dynamic_cast<PeekableReadBuffer *>(&read_buf);
@ -201,7 +210,8 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
peekable_buf->dropCheckpoint();
}
bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext)
template <typename FormatReaderImpl>
bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::readRow(MutableColumns & columns, RowReadExtension & ext)
{
if (unlikely(end_of_stream))
return false;
@ -280,7 +290,8 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
return true;
}
size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size)
template <typename FormatReaderImpl>
size_t RowInputFormatWithNamesAndTypes<FormatReaderImpl>::countRows(size_t max_block_size)
{
if (unlikely(end_of_stream))
return 0;
@ -304,7 +315,8 @@ size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size)
return num_rows;
}
void RowInputFormatWithNamesAndTypes::resetParser()
template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::resetParser()
{
RowInputFormatWithDiagnosticInfo::resetParser();
column_mapping->column_indexes_for_input_fields.clear();
@ -313,7 +325,8 @@ void RowInputFormatWithNamesAndTypes::resetParser()
end_of_stream = false;
}
void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
{
const auto & index = column_mapping->column_indexes_for_input_fields[file_column];
if (index)
@ -328,7 +341,8 @@ void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & ty
}
}
bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
template <typename FormatReaderImpl>
bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
{
if (in->eof())
{
@ -374,12 +388,14 @@ bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColu
return format_reader->parseRowEndWithDiagnosticInfo(out);
}
bool RowInputFormatWithNamesAndTypes::isGarbageAfterField(size_t index, ReadBuffer::Position pos)
template <typename FormatReaderImpl>
bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::isGarbageAfterField(size_t index, ReadBuffer::Position pos)
{
return format_reader->isGarbageAfterField(index, pos);
}
void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_)
template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::setReadBuffer(ReadBuffer & in_)
{
format_reader->setReadBuffer(in_);
IInputFormat::setReadBuffer(in_);
@ -582,5 +598,12 @@ void FormatWithNamesAndTypesSchemaReader::transformTypesIfNeeded(DB::DataTypePtr
transformInferredTypesIfNeeded(type, new_type, format_settings);
}
template class RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>;
template class RowInputFormatWithNamesAndTypes<JSONCompactEachRowFormatReader>;
template class RowInputFormatWithNamesAndTypes<TabSeparatedFormatReader>;
template class RowInputFormatWithNamesAndTypes<CSVFormatReader>;
template class RowInputFormatWithNamesAndTypes<CustomSeparatedFormatReader>;
template class RowInputFormatWithNamesAndTypes<BinaryFormatReader<true>>;
template class RowInputFormatWithNamesAndTypes<BinaryFormatReader<false>>;
}

View File

@ -26,6 +26,7 @@ class FormatWithNamesAndTypesReader;
/// will be compared types from header.
/// It's important that firstly this class reads/skips names and only
/// then reads/skips types. So you can this invariant.
template <typename FormatReaderImpl>
class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo
{
protected:
@ -41,7 +42,7 @@ protected:
bool with_names_,
bool with_types_,
const FormatSettings & format_settings_,
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_,
std::unique_ptr<FormatReaderImpl> format_reader_,
bool try_detect_header_ = false);
void resetParser() override;
@ -70,7 +71,7 @@ private:
bool is_header_detected = false;
protected:
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader;
std::unique_ptr<FormatReaderImpl> format_reader;
Block::NameMap column_indexes_by_names;
};

View File

@ -255,7 +255,7 @@ void buildSortingDAG(QueryPlan::Node & node, std::optional<ActionsDAG> & dag, Fi
/// Add more functions to fixed columns.
/// Functions result is fixed if all arguments are fixed or constants.
void enreachFixedColumns(const ActionsDAG & dag, FixedColumns & fixed_columns)
void enrichFixedColumns(const ActionsDAG & dag, FixedColumns & fixed_columns)
{
struct Frame
{
@ -300,20 +300,20 @@ void enreachFixedColumns(const ActionsDAG & dag, FixedColumns & fixed_columns)
{
if (frame.node->function_base->isDeterministicInScopeOfQuery())
{
//std::cerr << "*** enreachFixedColumns check " << frame.node->result_name << std::endl;
//std::cerr << "*** enrichFixedColumns check " << frame.node->result_name << std::endl;
bool all_args_fixed_or_const = true;
for (const auto * child : frame.node->children)
{
if (!child->column && !fixed_columns.contains(child))
{
//std::cerr << "*** enreachFixedColumns fail " << child->result_name << ' ' << static_cast<const void *>(child) << std::endl;
//std::cerr << "*** enrichFixedColumns fail " << child->result_name << ' ' << static_cast<const void *>(child) << std::endl;
all_args_fixed_or_const = false;
}
}
if (all_args_fixed_or_const)
{
//std::cerr << "*** enreachFixedColumns add " << frame.node->result_name << ' ' << static_cast<const void *>(frame.node) << std::endl;
//std::cerr << "*** enrichFixedColumns add " << frame.node->result_name << ' ' << static_cast<const void *>(frame.node) << std::endl;
fixed_columns.insert(frame.node);
}
}
@ -357,7 +357,7 @@ InputOrderInfoPtr buildInputOrderInfo(
}
}
enreachFixedColumns(sorting_key_dag, fixed_key_columns);
enrichFixedColumns(sorting_key_dag, fixed_key_columns);
}
/// This is a result direction we will read from MergeTree
@ -530,7 +530,7 @@ AggregationInputOrder buildInputOrderInfo(
}
}
enreachFixedColumns(sorting_key_dag, fixed_key_columns);
enrichFixedColumns(sorting_key_dag, fixed_key_columns);
for (const auto * output : dag->getOutputs())
{
@ -804,7 +804,7 @@ InputOrderInfoPtr buildInputOrderInfo(SortingStep & sorting, QueryPlan::Node & n
buildSortingDAG(node, dag, fixed_columns, limit);
if (dag && !fixed_columns.empty())
enreachFixedColumns(*dag, fixed_columns);
enrichFixedColumns(*dag, fixed_columns);
if (auto * reading = typeid_cast<ReadFromMergeTree *>(reading_node->step.get()))
{
@ -858,7 +858,7 @@ AggregationInputOrder buildInputOrderInfo(AggregatingStep & aggregating, QueryPl
buildSortingDAG(node, dag, fixed_columns, limit);
if (dag && !fixed_columns.empty())
enreachFixedColumns(*dag, fixed_columns);
enrichFixedColumns(*dag, fixed_columns);
if (auto * reading = typeid_cast<ReadFromMergeTree *>(reading_node->step.get()))
{

View File

@ -706,9 +706,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
}
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
for (const auto & stats : stats_vec)
for (const auto & [stats_column_name, stats] : stats_vec)
{
metadata.columns.modify(stats.column_name,
metadata.columns.modify(stats_column_name,
[&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); });
}
}
@ -735,14 +735,14 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
{
if (!metadata.columns.has(statistics_column_name))
{
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name);
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot modify statistics for column {}: this column is not found", statistics_column_name);
}
}
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
for (const auto & stats : stats_vec)
for (const auto & [stats_column_name, stats] : stats_vec)
{
metadata.columns.modify(stats.column_name,
metadata.columns.modify(stats_column_name,
[&](ColumnDescription & column) { column.statistics.assign(stats); });
}
}
@ -867,8 +867,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
rename_visitor.visit(column_to_modify.default_desc.expression);
if (column_to_modify.ttl)
rename_visitor.visit(column_to_modify.ttl);
if (column_to_modify.name == column_name && !column_to_modify.statistics.empty())
column_to_modify.statistics.column_name = rename_to;
});
}
if (metadata.table_ttl.definition_ast)

View File

@ -218,11 +218,7 @@ void ColumnDescription::readText(ReadBuffer & buf)
settings = col_ast->settings->as<ASTSetQuery &>().changes;
if (col_ast->statistics_desc)
{
statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type);
/// every column has name `x` here, so we have to set the name manually.
statistics.column_name = name;
}
}
else
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description");

View File

@ -807,7 +807,7 @@ MergeTreeDataPartBuilder IMergeTreeDataPart::getProjectionPartBuilder(const Stri
const char * projection_extension = is_temp_projection ? ".tmp_proj" : ".proj";
auto projection_storage = getDataPartStorage().getProjection(projection_name + projection_extension, !is_temp_projection);
MergeTreeDataPartBuilder builder(storage, projection_name, projection_storage);
return builder.withPartInfo({"all", 0, 0, 0}).withParentPart(this);
return builder.withPartInfo(MergeListElement::FAKE_RESULT_PART_FOR_PROJECTION).withParentPart(this);
}
void IMergeTreeDataPart::addProjectionPart(
@ -1334,17 +1334,6 @@ void IMergeTreeDataPart::loadRowsCount()
auto buf = metadata_manager->read("count.txt");
readIntText(rows_count, *buf);
assertEOF(*buf);
if (!index_granularity.empty() && rows_count < index_granularity.getTotalRows() && index_granularity_info.fixed_index_granularity)
{
/// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity.
index_granularity.popMark();
index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity);
if (rows_count != index_granularity.getTotalRows())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Index granularity total rows in part {} does not match rows_count: {}, instead of {}",
name, index_granularity.getTotalRows(), rows_count);
}
};
if (index_granularity.empty())

View File

@ -6,10 +6,18 @@
#include <Common/CurrentThread.h>
#include <Common/MemoryTracker.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
const MergeTreePartInfo MergeListElement::FAKE_RESULT_PART_FOR_PROJECTION = {"all", 0, 0, 0};
MergeListElement::MergeListElement(const StorageID & table_id_, FutureMergedMutatedPartPtr future_part, const ContextPtr & context)
: table_id{table_id_}
, partition_id{future_part->part_info.partition_id}
@ -21,8 +29,23 @@ MergeListElement::MergeListElement(const StorageID & table_id_, FutureMergedMuta
, merge_type{future_part->merge_type}
, merge_algorithm{MergeAlgorithm::Undecided}
{
auto format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING;
if (result_part_name != result_part_info.getPartNameV1())
format_version = MERGE_TREE_DATA_OLD_FORMAT_VERSION;
/// FIXME why do we need a merge list element for projection parts at all?
bool is_fake_projection_part = future_part->part_info == FAKE_RESULT_PART_FOR_PROJECTION;
size_t normal_parts_count = 0;
for (const auto & source_part : future_part->parts)
{
if (!is_fake_projection_part && !source_part->getParentPart())
{
++normal_parts_count;
if (!result_part_info.contains(MergeTreePartInfo::fromPartName(source_part->name, format_version)))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Source part {} is not covered by result part {}", source_part->name, result_part_info.getPartNameV1());
}
source_part_names.emplace_back(source_part->name);
source_part_paths.emplace_back(source_part->getDataPartStorage().getFullPath());
@ -35,13 +58,17 @@ MergeListElement::MergeListElement(const StorageID & table_id_, FutureMergedMuta
if (!future_part->parts.empty())
{
source_data_version = future_part->parts[0]->info.getDataVersion();
is_mutation = (result_part_info.getDataVersion() != source_data_version);
is_mutation = (result_part_info.level == future_part->parts[0]->info.level) && !is_fake_projection_part;
WriteBufferFromString out(partition);
const auto & part = future_part->parts[0];
part->partition.serializeText(part->storage, out, {});
}
if (!is_fake_projection_part && is_mutation && normal_parts_count != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} source parts for mutation {}: {}", future_part->parts.size(),
result_part_info.getPartNameV1(), fmt::join(source_part_names, ", "));
thread_group = ThreadGroup::createForBackgroundProcess(context);
}

View File

@ -66,6 +66,8 @@ struct Settings;
struct MergeListElement : boost::noncopyable
{
static const MergeTreePartInfo FAKE_RESULT_PART_FOR_PROJECTION;
const StorageID table_id;
std::string partition_id;
std::string partition;

View File

@ -0,0 +1,95 @@
#include <Storages/MergeTree/MergeProjectionPartsTask.h>
#include <Common/TransactionID.h>
#include <Storages/MergeTree/MergeList.h>
namespace DB
{
bool MergeProjectionPartsTask::executeStep()
{
auto & current_level_parts = level_parts[current_level];
auto & next_level_parts = level_parts[next_level];
MergeTreeData::MutableDataPartsVector selected_parts;
while (selected_parts.size() < max_parts_to_merge_in_one_level && !current_level_parts.empty())
{
selected_parts.push_back(std::move(current_level_parts.back()));
current_level_parts.pop_back();
}
if (selected_parts.empty())
{
if (next_level_parts.empty())
{
LOG_WARNING(log, "There is no projection parts merged");
/// Task is finished
return false;
}
current_level = next_level;
++next_level;
}
else if (selected_parts.size() == 1)
{
if (next_level_parts.empty())
{
LOG_DEBUG(log, "Merged a projection part in level {}", current_level);
selected_parts[0]->renameTo(projection.name + ".proj", true);
selected_parts[0]->setName(projection.name);
selected_parts[0]->is_temp = false;
new_data_part->addProjectionPart(name, std::move(selected_parts[0]));
/// Task is finished
return false;
}
else
{
LOG_DEBUG(log, "Forwarded part {} in level {} to next level", selected_parts[0]->name, current_level);
next_level_parts.push_back(std::move(selected_parts[0]));
}
}
else if (selected_parts.size() > 1)
{
// Generate a unique part name
++block_num;
auto projection_future_part = std::make_shared<FutureMergedMutatedPart>();
MergeTreeData::DataPartsVector const_selected_parts(
std::make_move_iterator(selected_parts.begin()), std::make_move_iterator(selected_parts.end()));
projection_future_part->assign(std::move(const_selected_parts));
projection_future_part->name = fmt::format("{}_{}", projection.name, ++block_num);
projection_future_part->part_info = {"all", 0, 0, 0};
MergeTreeData::MergingParams projection_merging_params;
projection_merging_params.mode = MergeTreeData::MergingParams::Ordinary;
if (projection.type == ProjectionDescription::Type::Aggregate)
projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating;
LOG_DEBUG(log, "Merged {} parts in level {} to {}", selected_parts.size(), current_level, projection_future_part->name);
auto tmp_part_merge_task = mutator->mergePartsToTemporaryPart(
projection_future_part,
projection.metadata,
merge_entry,
std::make_unique<MergeListElement>((*merge_entry)->table_id, projection_future_part, context),
*table_lock_holder,
time_of_merge,
context,
space_reservation,
false, // TODO Do we need deduplicate for projections
{},
false, // no cleanup
projection_merging_params,
NO_TRANSACTION_PTR,
/* need_prefix */ true,
new_data_part.get(),
".tmp_proj");
next_level_parts.push_back(executeHere(tmp_part_merge_task));
next_level_parts.back()->is_temp = true;
}
/// Need execute again
return true;
}
}

View File

@ -0,0 +1,84 @@
#pragma once
#include <Interpreters/StorageID.h>
#include <Storages/MergeTree/IExecutableTask.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
#include <Storages/MergeTree/MergeProgress.h>
#include <Storages/MergeTree/FutureMergedMutatedPart.h>
#include <Storages/ProjectionsDescription.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
class MergeProjectionPartsTask : public IExecutableTask
{
public:
MergeProjectionPartsTask(
String name_,
MergeTreeData::MutableDataPartsVector && parts_,
const ProjectionDescription & projection_,
size_t & block_num_,
ContextPtr context_,
TableLockHolder * table_lock_holder_,
MergeTreeDataMergerMutator * mutator_,
MergeListEntry * merge_entry_,
time_t time_of_merge_,
MergeTreeData::MutableDataPartPtr new_data_part_,
ReservationSharedPtr space_reservation_)
: name(std::move(name_))
, parts(std::move(parts_))
, projection(projection_)
, block_num(block_num_)
, context(context_)
, table_lock_holder(table_lock_holder_)
, mutator(mutator_)
, merge_entry(merge_entry_)
, time_of_merge(time_of_merge_)
, new_data_part(new_data_part_)
, space_reservation(space_reservation_)
, log(getLogger("MergeProjectionPartsTask"))
{
LOG_DEBUG(log, "Selected {} projection_parts from {} to {}", parts.size(), parts.front()->name, parts.back()->name);
level_parts[current_level] = std::move(parts);
}
void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); }
StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); }
Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); }
String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); }
bool executeStep() override;
private:
String name;
MergeTreeData::MutableDataPartsVector parts;
const ProjectionDescription & projection;
size_t & block_num;
ContextPtr context;
TableLockHolder * table_lock_holder;
MergeTreeDataMergerMutator * mutator;
MergeListEntry * merge_entry;
time_t time_of_merge;
MergeTreeData::MutableDataPartPtr new_data_part;
ReservationSharedPtr space_reservation;
LoggerPtr log;
std::map<size_t, MergeTreeData::MutableDataPartsVector> level_parts;
size_t current_level = 0;
size_t next_level = 1;
/// TODO(nikitamikhaylov): make this constant a setting
static constexpr size_t max_parts_to_merge_in_one_level = 10;
};
}

View File

@ -21,6 +21,8 @@
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/MergeTree/FutureMergedMutatedPart.h>
#include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
#include <Storages/MergeTree/MergeTreeDataWriter.h>
#include <Storages/MergeTree/MergeProjectionPartsTask.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/MaterializingTransform.h>
#include <Processors/Transforms/FilterTransform.h>
@ -63,6 +65,7 @@ namespace ErrorCodes
extern const int SUPPORT_IS_DISABLED;
}
static ColumnsStatistics getStatisticsForColumns(
const NamesAndTypesList & columns_to_read,
const StorageMetadataPtr & metadata_snapshot)
@ -75,7 +78,7 @@ static ColumnsStatistics getStatisticsForColumns(
const auto * desc = all_columns.tryGet(column.name);
if (desc && !desc->statistics.empty())
{
auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics);
auto statistics = MergeTreeStatisticsFactory::instance().get(*desc);
all_statistics.push_back(std::move(statistics));
}
}
@ -155,6 +158,13 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu
}
}
for (const auto * projection : global_ctx->projections_to_rebuild)
{
Names projection_columns_vec = projection->getRequiredColumns();
std::copy(projection_columns_vec.cbegin(), projection_columns_vec.cend(),
std::inserter(key_columns, key_columns.end()));
}
/// TODO: also force "summing" and "aggregating" columns to make Horizontal merge only for such columns
for (const auto & column : global_ctx->storage_columns)
@ -254,6 +264,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
extendObjectColumns(global_ctx->storage_columns, object_columns, false);
global_ctx->storage_snapshot = std::make_shared<StorageSnapshot>(*global_ctx->data, global_ctx->metadata_snapshot, std::move(object_columns));
prepareProjectionsToMergeAndRebuild();
extractMergingAndGatheringColumns();
global_ctx->new_data_part->uuid = global_ctx->future_part->uuid;
@ -517,6 +529,148 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::execute()
}
void MergeTask::ExecuteAndFinalizeHorizontalPart::prepareProjectionsToMergeAndRebuild() const
{
const auto mode = global_ctx->data->getSettings()->deduplicate_merge_projection_mode;
/// Under throw mode, we still choose to drop projections due to backward compatibility since some
/// users might have projections before this change.
if (global_ctx->data->merging_params.mode != MergeTreeData::MergingParams::Ordinary
&& (mode == DeduplicateMergeProjectionMode::THROW || mode == DeduplicateMergeProjectionMode::DROP))
return;
/// These merging modes may or may not reduce number of rows. It's not known until the horizontal stage is finished.
const bool merge_may_reduce_rows =
global_ctx->cleanup ||
global_ctx->deduplicate ||
ctx->merging_params.mode == MergeTreeData::MergingParams::Collapsing ||
ctx->merging_params.mode == MergeTreeData::MergingParams::Replacing ||
ctx->merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing;
const auto & projections = global_ctx->metadata_snapshot->getProjections();
for (const auto & projection : projections)
{
if (merge_may_reduce_rows)
{
global_ctx->projections_to_rebuild.push_back(&projection);
continue;
}
MergeTreeData::DataPartsVector projection_parts;
for (const auto & part : global_ctx->future_part->parts)
{
auto it = part->getProjectionParts().find(projection.name);
if (it != part->getProjectionParts().end() && !it->second->is_broken)
projection_parts.push_back(it->second);
}
if (projection_parts.size() == global_ctx->future_part->parts.size())
{
global_ctx->projections_to_merge.push_back(&projection);
global_ctx->projections_to_merge_parts[projection.name].assign(projection_parts.begin(), projection_parts.end());
}
else
{
chassert(projection_parts.size() < global_ctx->future_part->parts.size());
LOG_DEBUG(ctx->log, "Projection {} is not merged because some parts don't have it", projection.name);
continue;
}
}
const auto & settings = global_ctx->context->getSettingsRef();
for (const auto * projection : global_ctx->projections_to_rebuild)
ctx->projection_squashes.emplace_back(projection->sample_block.cloneEmpty(),
settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes);
}
void MergeTask::ExecuteAndFinalizeHorizontalPart::calculateProjections(const Block & block) const
{
for (size_t i = 0, size = global_ctx->projections_to_rebuild.size(); i < size; ++i)
{
const auto & projection = *global_ctx->projections_to_rebuild[i];
Block block_to_squash = projection.calculate(block, global_ctx->context);
auto & projection_squash_plan = ctx->projection_squashes[i];
projection_squash_plan.setHeader(block_to_squash.cloneEmpty());
Chunk squashed_chunk = Squashing::squash(projection_squash_plan.add({block_to_squash.getColumns(), block_to_squash.rows()}));
if (squashed_chunk)
{
auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns());
auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart(
*global_ctx->data, ctx->log, result, projection, global_ctx->new_data_part.get(), ++ctx->projection_block_num);
tmp_part.finalize();
tmp_part.part->getDataPartStorage().commitTransaction();
ctx->projection_parts[projection.name].emplace_back(std::move(tmp_part.part));
}
}
}
void MergeTask::ExecuteAndFinalizeHorizontalPart::finalizeProjections() const
{
for (size_t i = 0, size = global_ctx->projections_to_rebuild.size(); i < size; ++i)
{
const auto & projection = *global_ctx->projections_to_rebuild[i];
auto & projection_squash_plan = ctx->projection_squashes[i];
auto squashed_chunk = Squashing::squash(projection_squash_plan.flush());
if (squashed_chunk)
{
auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns());
auto temp_part = MergeTreeDataWriter::writeTempProjectionPart(
*global_ctx->data, ctx->log, result, projection, global_ctx->new_data_part.get(), ++ctx->projection_block_num);
temp_part.finalize();
temp_part.part->getDataPartStorage().commitTransaction();
ctx->projection_parts[projection.name].emplace_back(std::move(temp_part.part));
}
}
ctx->projection_parts_iterator = std::make_move_iterator(ctx->projection_parts.begin());
if (ctx->projection_parts_iterator != std::make_move_iterator(ctx->projection_parts.end()))
constructTaskForProjectionPartsMerge();
}
void MergeTask::ExecuteAndFinalizeHorizontalPart::constructTaskForProjectionPartsMerge() const
{
auto && [name, parts] = *ctx->projection_parts_iterator;
const auto & projection = global_ctx->metadata_snapshot->projections.get(name);
ctx->merge_projection_parts_task_ptr = std::make_unique<MergeProjectionPartsTask>
(
name,
std::move(parts),
projection,
ctx->projection_block_num,
global_ctx->context,
global_ctx->holder,
global_ctx->mutator,
global_ctx->merge_entry,
global_ctx->time_of_merge,
global_ctx->new_data_part,
global_ctx->space_reservation
);
}
bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeMergeProjections() // NOLINT
{
/// In case if there are no projections we didn't construct a task
if (!ctx->merge_projection_parts_task_ptr)
return false;
if (ctx->merge_projection_parts_task_ptr->executeStep())
return true;
++ctx->projection_parts_iterator;
if (ctx->projection_parts_iterator == std::make_move_iterator(ctx->projection_parts.end()))
return false;
constructTaskForProjectionPartsMerge();
return true;
}
bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl()
{
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
@ -535,6 +689,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl()
global_ctx->rows_written += block.rows();
const_cast<MergedBlockOutputStream &>(*global_ctx->to).write(block);
calculateProjections(block);
UInt64 result_rows = 0;
UInt64 result_bytes = 0;
global_ctx->merged_pipeline.tryGetResultRowsAndBytes(result_rows, result_bytes);
@ -558,8 +714,10 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl()
return true;
}
void MergeTask::ExecuteAndFinalizeHorizontalPart::finalize() const
{
finalizeProjections();
global_ctx->merging_executor.reset();
global_ctx->merged_pipeline.reset();
@ -847,35 +1005,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
ReadableSize(global_ctx->merge_list_element_ptr->bytes_read_uncompressed / elapsed_seconds));
}
const auto mode = global_ctx->data->getSettings()->deduplicate_merge_projection_mode;
/// Under throw mode, we still choose to drop projections due to backward compatibility since some
/// users might have projections before this change.
if (global_ctx->data->merging_params.mode != MergeTreeData::MergingParams::Ordinary
&& (mode == DeduplicateMergeProjectionMode::THROW || mode == DeduplicateMergeProjectionMode::DROP))
for (const auto & projection : global_ctx->projections_to_merge)
{
ctx->projections_iterator = ctx->tasks_for_projections.begin();
return false;
}
const auto & projections = global_ctx->metadata_snapshot->getProjections();
for (const auto & projection : projections)
{
MergeTreeData::DataPartsVector projection_parts;
for (const auto & part : global_ctx->future_part->parts)
{
auto actual_projection_parts = part->getProjectionParts();
auto it = actual_projection_parts.find(projection.name);
if (it != actual_projection_parts.end() && !it->second->is_broken)
projection_parts.push_back(it->second);
}
if (projection_parts.size() < global_ctx->future_part->parts.size())
{
LOG_DEBUG(ctx->log, "Projection {} is not merged because some parts don't have it", projection.name);
continue;
}
MergeTreeData::DataPartsVector projection_parts = global_ctx->projections_to_merge_parts[projection->name];
LOG_DEBUG(
ctx->log,
"Selected {} projection_parts from {} to {}",
@ -885,24 +1017,25 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
auto projection_future_part = std::make_shared<FutureMergedMutatedPart>();
projection_future_part->assign(std::move(projection_parts));
projection_future_part->name = projection.name;
projection_future_part->name = projection->name;
// TODO (ab): path in future_part is only for merge process introspection, which is not available for merges of projection parts.
// Let's comment this out to avoid code inconsistency and add it back after we implement projection merge introspection.
// projection_future_part->path = global_ctx->future_part->path + "/" + projection.name + ".proj/";
projection_future_part->part_info = {"all", 0, 0, 0};
projection_future_part->part_info = MergeListElement::FAKE_RESULT_PART_FOR_PROJECTION;
MergeTreeData::MergingParams projection_merging_params;
projection_merging_params.mode = MergeTreeData::MergingParams::Ordinary;
if (projection.type == ProjectionDescription::Type::Aggregate)
if (projection->type == ProjectionDescription::Type::Aggregate)
projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating;
ctx->tasks_for_projections.emplace_back(std::make_shared<MergeTask>(
projection_future_part,
projection.metadata,
projection->metadata,
global_ctx->merge_entry,
std::make_unique<MergeListElement>((*global_ctx->merge_entry)->table_id, projection_future_part, global_ctx->context),
global_ctx->time_of_merge,
global_ctx->context,
*global_ctx->holder,
global_ctx->space_reservation,
global_ctx->deduplicate,
global_ctx->deduplicate_by_columns,

View File

@ -9,6 +9,7 @@
#include <Compression/CompressedReadBuffer.h>
#include <Compression/CompressedReadBufferFromFile.h>
#include <Interpreters/Squashing.h>
#include <Interpreters/TemporaryDataOnDisk.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
@ -72,6 +73,7 @@ public:
std::unique_ptr<MergeListElement> projection_merge_list_element_,
time_t time_of_merge_,
ContextPtr context_,
TableLockHolder & holder,
ReservationSharedPtr space_reservation_,
bool deduplicate_,
Names deduplicate_by_columns_,
@ -96,6 +98,7 @@ public:
= global_ctx->projection_merge_list_element ? global_ctx->projection_merge_list_element.get() : (*global_ctx->merge_entry)->ptr();
global_ctx->time_of_merge = std::move(time_of_merge_);
global_ctx->context = std::move(context_);
global_ctx->holder = &holder;
global_ctx->space_reservation = std::move(space_reservation_);
global_ctx->deduplicate = std::move(deduplicate_);
global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_);
@ -151,6 +154,7 @@ private:
/// Proper initialization is responsibility of the author
struct GlobalRuntimeContext : public IStageRuntimeContext
{
TableLockHolder * holder;
MergeList::Entry * merge_entry{nullptr};
/// If not null, use this instead of the global MergeList::Entry. This is for merging projections.
std::unique_ptr<MergeListElement> projection_merge_list_element;
@ -181,6 +185,10 @@ private:
MergeAlgorithm chosen_merge_algorithm{MergeAlgorithm::Undecided};
std::vector<ProjectionDescriptionRawPtr> projections_to_rebuild{};
std::vector<ProjectionDescriptionRawPtr> projections_to_merge{};
std::map<String, MergeTreeData::DataPartsVector> projections_to_merge_parts{};
std::unique_ptr<MergeStageProgress> horizontal_stage_progress{nullptr};
std::unique_ptr<MergeStageProgress> column_progress{nullptr};
@ -228,6 +236,14 @@ private:
std::unique_ptr<WriteBuffer> rows_sources_write_buf{nullptr};
std::optional<ColumnSizeEstimator> column_sizes{};
/// For projections to rebuild
using ProjectionNameToItsBlocks = std::map<String, MergeTreeData::MutableDataPartsVector>;
ProjectionNameToItsBlocks projection_parts;
std::move_iterator<ProjectionNameToItsBlocks::iterator> projection_parts_iterator;
std::vector<Squashing> projection_squashes;
size_t projection_block_num = 0;
ExecutableTaskPtr merge_projection_parts_task_ptr;
size_t initial_reservation{0};
bool read_with_direct_io{false};
@ -257,16 +273,23 @@ private:
void finalize() const;
/// NOTE: Using pointer-to-member instead of std::function and lambda makes stacktraces much more concise and readable
using ExecuteAndFinalizeHorizontalPartSubtasks = std::array<bool(ExecuteAndFinalizeHorizontalPart::*)(), 2>;
using ExecuteAndFinalizeHorizontalPartSubtasks = std::array<bool(ExecuteAndFinalizeHorizontalPart::*)(), 3>;
const ExecuteAndFinalizeHorizontalPartSubtasks subtasks
{
&ExecuteAndFinalizeHorizontalPart::prepare,
&ExecuteAndFinalizeHorizontalPart::executeImpl
&ExecuteAndFinalizeHorizontalPart::executeImpl,
&ExecuteAndFinalizeHorizontalPart::executeMergeProjections
};
ExecuteAndFinalizeHorizontalPartSubtasks::const_iterator subtasks_iterator = subtasks.begin();
void prepareProjectionsToMergeAndRebuild() const;
void calculateProjections(const Block & block) const;
void finalizeProjections() const;
void constructTaskForProjectionPartsMerge() const;
bool executeMergeProjections();
MergeAlgorithm chooseMergeAlgorithm() const;
void createMergedStream();
void extractMergingAndGatheringColumns() const;

View File

@ -671,7 +671,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart(
const StorageMetadataPtr & metadata_snapshot,
MergeList::Entry * merge_entry,
std::unique_ptr<MergeListElement> projection_merge_list_element,
TableLockHolder,
TableLockHolder & holder,
time_t time_of_merge,
ContextPtr context,
ReservationSharedPtr space_reservation,
@ -691,6 +691,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart(
std::move(projection_merge_list_element),
time_of_merge,
context,
holder,
space_reservation,
deduplicate,
deduplicate_by_columns,

View File

@ -159,7 +159,7 @@ public:
const StorageMetadataPtr & metadata_snapshot,
MergeListEntry * merge_entry,
std::unique_ptr<MergeListElement> projection_merge_list_element,
TableLockHolder table_lock_holder,
TableLockHolder & table_lock_holder,
time_t time_of_merge,
ContextPtr context,
ReservationSharedPtr space_reservation,

View File

@ -577,10 +577,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
if (index_granularity_rows != index_granularity.getMarkRows(mark_num))
{
/// With fixed granularity we can have last mark with less rows than granularity
const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount());
if (!index_granularity_info.fixed_index_granularity || !is_last_mark)
throw Exception(
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Incorrect mark rows for part {} for mark #{}"
" (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}",
@ -844,14 +841,7 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_
/// Without offset
rows_written_in_last_mark = 0;
}
if (compute_granularity)
{
index_granularity.popMark();
index_granularity.appendMark(new_rows_in_last_mark);
}
}
}
}

View File

@ -24,6 +24,7 @@
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
#include <Storages/MergeTree/MergeTreeDataWriter.h>
#include <Storages/MergeTree/MergeProjectionPartsTask.h>
#include <Storages/MutationCommands.h>
#include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
#include <Storages/MergeTree/MergeTreeIndexFullText.h>
@ -552,7 +553,7 @@ static std::set<ColumnStatisticsPtr> getStatisticsToRecalculate(const StorageMet
{
if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name))
{
stats_to_recalc.insert(stats_factory.get(col_desc.statistics));
stats_to_recalc.insert(stats_factory.get(col_desc));
}
}
return stats_to_recalc;
@ -1058,136 +1059,6 @@ struct MutationContext
using MutationContextPtr = std::shared_ptr<MutationContext>;
class MergeProjectionPartsTask : public IExecutableTask
{
public:
MergeProjectionPartsTask(
String name_,
MergeTreeData::MutableDataPartsVector && parts_,
const ProjectionDescription & projection_,
size_t & block_num_,
MutationContextPtr ctx_)
: name(std::move(name_))
, parts(std::move(parts_))
, projection(projection_)
, block_num(block_num_)
, ctx(ctx_)
, log(getLogger("MergeProjectionPartsTask"))
{
LOG_DEBUG(log, "Selected {} projection_parts from {} to {}", parts.size(), parts.front()->name, parts.back()->name);
level_parts[current_level] = std::move(parts);
}
void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); }
StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); }
Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); }
String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); }
bool executeStep() override
{
auto & current_level_parts = level_parts[current_level];
auto & next_level_parts = level_parts[next_level];
MergeTreeData::MutableDataPartsVector selected_parts;
while (selected_parts.size() < max_parts_to_merge_in_one_level && !current_level_parts.empty())
{
selected_parts.push_back(std::move(current_level_parts.back()));
current_level_parts.pop_back();
}
if (selected_parts.empty())
{
if (next_level_parts.empty())
{
LOG_WARNING(log, "There is no projection parts merged");
/// Task is finished
return false;
}
current_level = next_level;
++next_level;
}
else if (selected_parts.size() == 1)
{
if (next_level_parts.empty())
{
LOG_DEBUG(log, "Merged a projection part in level {}", current_level);
selected_parts[0]->renameTo(projection.name + ".proj", true);
selected_parts[0]->setName(projection.name);
selected_parts[0]->is_temp = false;
ctx->new_data_part->addProjectionPart(name, std::move(selected_parts[0]));
/// Task is finished
return false;
}
else
{
LOG_DEBUG(log, "Forwarded part {} in level {} to next level", selected_parts[0]->name, current_level);
next_level_parts.push_back(std::move(selected_parts[0]));
}
}
else if (selected_parts.size() > 1)
{
// Generate a unique part name
++block_num;
auto projection_future_part = std::make_shared<FutureMergedMutatedPart>();
MergeTreeData::DataPartsVector const_selected_parts(
std::make_move_iterator(selected_parts.begin()), std::make_move_iterator(selected_parts.end()));
projection_future_part->assign(std::move(const_selected_parts));
projection_future_part->name = fmt::format("{}_{}", projection.name, ++block_num);
projection_future_part->part_info = {"all", 0, 0, 0};
MergeTreeData::MergingParams projection_merging_params;
projection_merging_params.mode = MergeTreeData::MergingParams::Ordinary;
if (projection.type == ProjectionDescription::Type::Aggregate)
projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating;
LOG_DEBUG(log, "Merged {} parts in level {} to {}", selected_parts.size(), current_level, projection_future_part->name);
auto tmp_part_merge_task = ctx->mutator->mergePartsToTemporaryPart(
projection_future_part,
projection.metadata,
ctx->mutate_entry,
std::make_unique<MergeListElement>((*ctx->mutate_entry)->table_id, projection_future_part, ctx->context),
*ctx->holder,
ctx->time_of_mutation,
ctx->context,
ctx->space_reservation,
false, // TODO Do we need deduplicate for projections
{},
false, // no cleanup
projection_merging_params,
NO_TRANSACTION_PTR,
/* need_prefix */ true,
ctx->new_data_part.get(),
".tmp_proj");
next_level_parts.push_back(executeHere(tmp_part_merge_task));
next_level_parts.back()->is_temp = true;
}
/// Need execute again
return true;
}
private:
String name;
MergeTreeData::MutableDataPartsVector parts;
const ProjectionDescription & projection;
size_t & block_num;
MutationContextPtr ctx;
LoggerPtr log;
std::map<size_t, MergeTreeData::MutableDataPartsVector> level_parts;
size_t current_level = 0;
size_t next_level = 1;
/// TODO(nikitamikhaylov): make this constant a setting
static constexpr size_t max_parts_to_merge_in_one_level = 10;
};
// This class is responsible for:
// 1. get projection pipeline and a sink to write parts
// 2. build an executor that can write block to the input stream (actually we can write through it to generate as many parts as possible)
@ -1406,7 +1277,13 @@ void PartMergerWriter::constructTaskForProjectionPartsMerge()
std::move(parts),
projection,
block_num,
ctx
ctx->context,
ctx->holder,
ctx->mutator,
ctx->mutate_entry,
ctx->time_of_mutation,
ctx->new_data_part,
ctx->space_reservation
);
}
@ -1557,7 +1434,7 @@ private:
if (ctx->materialized_statistics.contains(col.name))
{
stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col.statistics));
stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col));
}
else
{

View File

@ -148,10 +148,12 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context,
{
if (engine_args.size() < 3 || engine_args.size() > (with_structure ? 8 : 7))
{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Storage AzureBlobStorage requires 3 to 7 arguments: "
"AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, "
"[account_name, account_key, format, compression, structure)])");
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Storage AzureBlobStorage requires 3 to {} arguments: "
"AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, "
"[account_name, account_key, format, compression, structure)])",
(with_structure ? 8 : 7));
}
for (auto & engine_arg : engine_args)

View File

@ -3,7 +3,7 @@
#include "config.h"
#include <set>
#if USE_AWS_S3 && USE_PARQUET
#if USE_PARQUET
#include <Common/logger_useful.h>
#include <Columns/ColumnString.h>
@ -425,8 +425,9 @@ struct DeltaLakeMetadataImpl
{
auto field = fields->getObject(static_cast<Int32>(i));
element_names.push_back(field->getValue<String>("name"));
auto required = field->getValue<bool>("required");
element_types.push_back(getFieldType(field, "type", required));
auto is_nullable = field->getValue<bool>("nullable");
element_types.push_back(getFieldType(field, "type", is_nullable));
}
return std::make_shared<DataTypeTuple>(element_types, element_names);
@ -434,16 +435,16 @@ struct DeltaLakeMetadataImpl
if (type_name == "array")
{
bool is_nullable = type->getValue<bool>("containsNull");
auto element_type = getFieldType(type, "elementType", is_nullable);
bool element_nullable = type->getValue<bool>("containsNull");
auto element_type = getFieldType(type, "elementType", element_nullable);
return std::make_shared<DataTypeArray>(element_type);
}
if (type_name == "map")
{
bool is_nullable = type->getValue<bool>("containsNull");
auto key_type = getFieldType(type, "keyType", /* is_nullable */false);
auto value_type = getFieldType(type, "valueType", is_nullable);
bool value_nullable = type->getValue<bool>("valueContainsNull");
auto value_type = getFieldType(type, "valueType", value_nullable);
return std::make_shared<DataTypeMap>(key_type, value_type);
}

View File

@ -2,7 +2,7 @@
#include "config.h"
#if USE_AWS_S3 && USE_AVRO
#if USE_AVRO
#include <Formats/FormatFactory.h>
#include <Storages/IStorage.h>

View File

@ -1,6 +1,6 @@
#include "config.h"
#if USE_AWS_S3 && USE_AVRO
#if USE_AVRO
#include <Common/logger_useful.h>
#include <Core/Settings.h>

View File

@ -1,6 +1,6 @@
#pragma once
#if USE_AWS_S3 && USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
#include <Interpreters/Context_fwd.h>
#include <Core/Types.h>

View File

@ -2,10 +2,12 @@
#if USE_AWS_S3
#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
#include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h>
#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
#include <Storages/ObjectStorage/S3/Configuration.h>
# include <Storages/ObjectStorage/Azure/Configuration.h>
# include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
# include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h>
# include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
# include <Storages/ObjectStorage/Local/Configuration.h>
# include <Storages/ObjectStorage/S3/Configuration.h>
namespace DB
@ -22,6 +24,54 @@ void registerStorageIceberg(StorageFactory & factory)
auto configuration = std::make_shared<StorageS3Configuration>();
StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
return StorageIceberg::create(
configuration, args.getContext(), args.table_id, args.columns, args.constraints, args.comment, std::nullopt, args.mode);
},
{
.supports_settings = false,
.supports_schema_inference = true,
.source_access_type = AccessType::S3,
});
factory.registerStorage(
"IcebergS3",
[&](const StorageFactory::Arguments & args)
{
auto configuration = std::make_shared<StorageS3Configuration>();
StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
return StorageIceberg::create(
configuration, args.getContext(), args.table_id, args.columns, args.constraints, args.comment, std::nullopt, args.mode);
},
{
.supports_settings = false,
.supports_schema_inference = true,
.source_access_type = AccessType::S3,
});
factory.registerStorage(
"IcebergAzure",
[&](const StorageFactory::Arguments & args)
{
auto configuration = std::make_shared<StorageAzureConfiguration>();
StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), true);
return StorageIceberg::create(
configuration, args.getContext(), args.table_id, args.columns, args.constraints, args.comment, std::nullopt, args.mode);
},
{
.supports_settings = false,
.supports_schema_inference = true,
.source_access_type = AccessType::AZURE,
});
factory.registerStorage(
"IcebergLocal",
[&](const StorageFactory::Arguments & args)
{
auto configuration = std::make_shared<StorageLocalConfiguration>();
StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
return StorageIceberg::create(
configuration, args.getContext(), args.table_id, args.columns,
args.constraints, args.comment, std::nullopt, args.mode);
@ -29,7 +79,7 @@ void registerStorageIceberg(StorageFactory & factory)
{
.supports_settings = false,
.supports_schema_inference = true,
.source_access_type = AccessType::S3,
.source_access_type = AccessType::FILE,
});
}

View File

@ -0,0 +1,77 @@
#include <Core/Settings.h>
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Storages/ObjectStorage/Local/Configuration.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include "Common/NamedCollections/NamedCollections.h"
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
void StorageLocalConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr)
{
path = collection.get<String>("path");
format = collection.getOrDefault<String>("format", "auto");
compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
structure = collection.getOrDefault<String>("structure", "auto");
paths = {path};
}
void StorageLocalConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
{
const size_t max_args_num = with_structure ? 4 : 3;
if (args.empty() || args.size() > max_args_num)
{
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Expected not more than {} arguments", max_args_num);
}
for (auto & arg : args)
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
path = checkAndGetLiteralArgument<String>(args[0], "path");
if (args.size() > 1)
{
format = checkAndGetLiteralArgument<String>(args[1], "format_name");
}
if (with_structure)
{
if (args.size() > 2)
{
structure = checkAndGetLiteralArgument<String>(args[2], "structure");
}
if (args.size() > 3)
{
compression_method = checkAndGetLiteralArgument<String>(args[3], "compression_method");
}
}
else if (args.size() > 2)
{
compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
}
paths = {path};
}
StorageObjectStorage::QuerySettings StorageLocalConfiguration::getQuerySettings(const ContextPtr & context) const
{
const auto & settings = context->getSettingsRef();
return StorageObjectStorage::QuerySettings{
.truncate_on_insert = settings.engine_file_truncate_on_insert,
.create_new_file_on_insert = false,
.schema_inference_use_cache = settings.schema_inference_use_cache_for_file,
.schema_inference_mode = settings.schema_inference_mode,
.skip_empty_files = settings.engine_file_skip_empty_files,
.list_object_keys_size = 0,
.throw_on_zero_files_match = false,
.ignore_non_existent_file = false};
}
}

View File

@ -0,0 +1,52 @@
#pragma once
#include <memory>
#include "Disks/ObjectStorages/Local/LocalObjectStorage.h"
#include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
class StorageLocalConfiguration : public StorageObjectStorage::Configuration
{
public:
using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
static constexpr auto type_name = "local";
StorageLocalConfiguration() = default;
StorageLocalConfiguration(const StorageLocalConfiguration & other) = default;
std::string getTypeName() const override { return type_name; }
std::string getEngineName() const override { return "Local"; }
Path getPath() const override { return path; }
void setPath(const Path & path_) override { path = path_; }
const Paths & getPaths() const override { return paths; }
void setPaths(const Paths & paths_) override { paths = paths_; }
String getNamespace() const override { return ""; }
String getDataSourceDescription() const override { return ""; }
StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
ConfigurationPtr clone() override { return std::make_shared<StorageLocalConfiguration>(*this); }
ObjectStoragePtr createObjectStorage(ContextPtr, bool) override { return std::make_shared<LocalObjectStorage>("/"); }
void addStructureAndFormatToArgs(ASTs &, const String &, const String &, ContextPtr) override { }
private:
void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override;
void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
Path path;
Paths paths;
};
}

View File

@ -465,6 +465,12 @@ SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, c
DEFAULT_SCHEMA_CACHE_ELEMENTS));
return schema_cache;
}
else if (storage_type_name == "local")
{
static SchemaCache schema_cache(
context->getConfigRef().getUInt("schema_inference_cache_max_elements_for_local", DEFAULT_SCHEMA_CACHE_ELEMENTS));
return schema_cache;
}
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name);
}

View File

@ -162,7 +162,7 @@ public:
ContextPtr local_context,
bool with_table_structure);
/// Storage type: s3, hdfs, azure.
/// Storage type: s3, hdfs, azure, local.
virtual std::string getTypeName() const = 0;
/// Engine name: S3, HDFS, Azure.
virtual std::string getEngineName() const = 0;

View File

@ -417,10 +417,7 @@ std::future<StorageObjectStorageSource::ReaderHolder> StorageObjectStorageSource
}
std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(
const ObjectInfo & object_info,
const ObjectStoragePtr & object_storage,
const ContextPtr & context_,
const LoggerPtr & log)
const ObjectInfo & object_info, const ObjectStoragePtr & object_storage, const ContextPtr & context_, const LoggerPtr & log)
{
const auto & object_size = object_info.metadata->size_bytes;

View File

@ -58,8 +58,8 @@ IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
{
}
ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_)
: stats_desc(stats_desc_)
ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_)
: stats_desc(stats_desc_), column_name(column_name_)
{
}
@ -176,7 +176,7 @@ String ColumnStatistics::getFileName() const
const String & ColumnStatistics::columnName() const
{
return stats_desc.column_name;
return column_name;
}
UInt64 ColumnStatistics::rowCount() const
@ -227,15 +227,15 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st
}
}
ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescription & stats) const
ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnDescription & column_desc) const
{
ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(stats);
for (const auto & [type, desc] : stats.types_to_desc)
ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(column_desc.statistics, column_desc.name);
for (const auto & [type, desc] : column_desc.statistics.types_to_desc)
{
auto it = creators.find(type);
if (it == creators.end())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
auto stat_ptr = (it->second)(desc, stats.data_type);
auto stat_ptr = (it->second)(desc, column_desc.type);
column_stat->stats[type] = stat_ptr;
}
return column_stat;
@ -246,7 +246,7 @@ ColumnsStatistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription &
ColumnsStatistics result;
for (const auto & col : columns)
if (!col.statistics.empty())
result.push_back(get(col.statistics));
result.push_back(get(col));
return result;
}

View File

@ -54,7 +54,7 @@ using StatisticsPtr = std::shared_ptr<IStatistics>;
class ColumnStatistics
{
public:
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_);
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_);
void serialize(WriteBuffer & buf);
void deserialize(ReadBuffer & buf);
@ -73,10 +73,12 @@ public:
private:
friend class MergeTreeStatisticsFactory;
ColumnStatisticsDescription stats_desc;
String column_name;
std::map<StatisticsType, StatisticsPtr> stats;
UInt64 rows = 0; /// the number of rows in the column
};
struct ColumnDescription;
class ColumnsDescription;
using ColumnStatisticsPtr = std::shared_ptr<ColumnStatistics>;
using ColumnsStatistics = std::vector<ColumnStatisticsPtr>;
@ -91,7 +93,7 @@ public:
using Validator = std::function<void(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const;
ColumnStatisticsPtr get(const ColumnDescription & column_desc) const;
ColumnsStatistics getMany(const ColumnsDescription & columns) const;
void registerValidator(StatisticsType type, Validator validator);

View File

@ -6,7 +6,6 @@
#include <Parsers/ASTStatisticsDeclaration.h>
#include <Parsers/queryToString.h>
#include <Parsers/ParserCreateQuery.h>
#include <Poco/Logger.h>
#include <Storages/ColumnsDescription.h>
@ -97,16 +96,13 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
{
chassert(merging_column_type);
if (column_name.empty())
column_name = merging_column_name;
data_type = merging_column_type;
for (const auto & [stats_type, stats_desc]: other.types_to_desc)
{
if (!if_not_exists && types_to_desc.contains(stats_type))
{
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, column_name);
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, merging_column_name);
}
else if (!types_to_desc.contains(stats_type))
types_to_desc.emplace(stats_type, stats_desc);
@ -115,9 +111,6 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other)
{
if (other.column_name != column_name)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);
types_to_desc = other.types_to_desc;
data_type = other.data_type;
}
@ -127,7 +120,7 @@ void ColumnStatisticsDescription::clear()
types_to_desc.clear();
}
std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
std::vector<std::pair<String, ColumnStatisticsDescription>> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
{
const auto * stat_definition_ast = definition_ast->as<ASTStatisticsDeclaration>();
if (!stat_definition_ast)
@ -145,7 +138,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
statistics_types.emplace(stat.type, stat);
}
std::vector<ColumnStatisticsDescription> result;
std::vector<std::pair<String, ColumnStatisticsDescription>> result;
result.reserve(stat_definition_ast->columns->children.size());
for (const auto & column_ast : stat_definition_ast->columns->children)
@ -157,10 +150,9 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name);
const auto & column = columns.getPhysical(physical_column_name);
stats.column_name = column.name;
stats.data_type = column.type;
stats.types_to_desc = statistics_types;
result.push_back(stats);
result.emplace_back(physical_column_name, stats);
}
if (result.empty())
@ -175,14 +167,13 @@ ColumnStatisticsDescription ColumnStatisticsDescription::fromColumnDeclaration(c
if (stat_type_list_ast->children.empty())
throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column));
ColumnStatisticsDescription stats;
stats.column_name = column.name;
for (const auto & ast : stat_type_list_ast->children)
{
const auto & stat_type = ast->as<const ASTFunction &>().name;
SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone());
if (stats.types_to_desc.contains(stat.type))
throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", stats.column_name, stat_type);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", column.name, stat_type);
stats.types_to_desc.emplace(stat.type, std::move(stat));
}
stats.data_type = data_type;

View File

@ -55,12 +55,12 @@ struct ColumnStatisticsDescription
ASTPtr getAST() const;
static std::vector<ColumnStatisticsDescription> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
/// get a vector of <column name, statistics desc> pair
static std::vector<std::pair<String, ColumnStatisticsDescription>> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type);
using StatisticsTypeDescMap = std::map<StatisticsType, SingleStatisticsDescription>;
StatisticsTypeDescMap types_to_desc;
String column_name;
DataTypePtr data_type;
};

View File

@ -1051,17 +1051,27 @@ void StorageWindowView::threadFuncFireProc()
if (shutdown_called)
return;
/// Acquiring the lock can take seconds (depends on how long it takes to push) so we keep a reference to remember
/// what's the starting point where we want to push from
UInt32 timestamp_start = now();
std::lock_guard lock(fire_signal_mutex);
/// TODO: consider using time_t instead (for every timestamp in this class)
UInt32 timestamp_now = now();
LOG_TRACE(log, "Now: {}, next fire signal: {}, max watermark: {}", timestamp_now, next_fire_signal, max_watermark);
LOG_TRACE(
log,
"Start: {}, now: {}, next fire signal: {}, max watermark: {}",
timestamp_start,
timestamp_now,
next_fire_signal,
max_watermark);
while (next_fire_signal <= timestamp_now)
{
try
{
if (max_watermark >= timestamp_now)
if (max_watermark >= timestamp_start)
fire(next_fire_signal);
}
catch (...)
@ -1075,11 +1085,18 @@ void StorageWindowView::threadFuncFireProc()
slide_interval *= 86400;
next_fire_signal += slide_interval;
LOG_TRACE(log, "Now: {}, next fire signal: {}, max watermark: {}, max fired watermark: {}, slide interval: {}",
timestamp_now, next_fire_signal, max_watermark, max_fired_watermark, slide_interval);
LOG_TRACE(
log,
"Start: {}, now: {}, next fire signal: {}, max watermark: {}, max fired watermark: {}, slide interval: {}",
timestamp_start,
timestamp_now,
next_fire_signal,
max_watermark,
max_fired_watermark,
slide_interval);
}
if (max_watermark >= timestamp_now)
if (max_watermark >= timestamp_start)
clean_cache_task->schedule();
UInt64 next_fire_ms = static_cast<UInt64>(next_fire_signal) * 1000;

View File

@ -76,6 +76,21 @@ struct TableFunctionIcebergName
static constexpr auto name = "iceberg";
};
struct TableFunctionIcebergS3Name
{
static constexpr auto name = "icebergS3";
};
struct TableFunctionIcebergAzureName
{
static constexpr auto name = "icebergAzure";
};
struct TableFunctionIcebergLocalName
{
static constexpr auto name = "icebergLocal";
};
struct TableFunctionDeltaLakeName
{
static constexpr auto name = "deltaLake";
@ -86,14 +101,20 @@ struct TableFunctionHudiName
static constexpr auto name = "hudi";
};
#if USE_AWS_S3
#if USE_AVRO
# if USE_AWS_S3
using TableFunctionIceberg = ITableFunctionDataLake<TableFunctionIcebergName, StorageIceberg, TableFunctionS3>;
using TableFunctionIcebergS3 = ITableFunctionDataLake<TableFunctionIcebergS3Name, StorageIceberg, TableFunctionS3>;
# endif
# if USE_AZURE_BLOB_STORAGE
using TableFunctionIcebergAzure = ITableFunctionDataLake<TableFunctionIcebergAzureName, StorageIceberg, TableFunctionAzureBlob>;
# endif
using TableFunctionIcebergLocal = ITableFunctionDataLake<TableFunctionIcebergLocalName, StorageIceberg, TableFunctionLocal>;
#endif
#if USE_PARQUET
#if USE_AWS_S3
# if USE_PARQUET
using TableFunctionDeltaLake = ITableFunctionDataLake<TableFunctionDeltaLakeName, StorageDeltaLake, TableFunctionS3>;
#endif
using TableFunctionHudi = ITableFunctionDataLake<TableFunctionHudiName, StorageHudi, TableFunctionS3>;
#endif
}

View File

@ -14,10 +14,11 @@
#include <Storages/ObjectStorage/Utils.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Storages/ObjectStorage/S3/Configuration.h>
#include <Storages/ObjectStorage/HDFS/Configuration.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/ObjectStorage/Azure/Configuration.h>
#include <Storages/ObjectStorage/HDFS/Configuration.h>
#include <Storages/ObjectStorage/Local/Configuration.h>
#include <Storages/ObjectStorage/S3/Configuration.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h>
namespace DB
@ -223,5 +224,5 @@ template class TableFunctionObjectStorage<OSSDefinition, StorageS3Configuration>
template class TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>;
template class TableFunctionObjectStorage<HDFSClusterDefinition, StorageHDFSConfiguration>;
#endif
template class TableFunctionObjectStorage<LocalDefinition, StorageLocalConfiguration>;
}

View File

@ -1,11 +1,11 @@
#pragma once
#include "config.h"
#include <TableFunctions/ITableFunction.h>
#include <Formats/FormatFactory.h>
#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
#include <Storages/VirtualColumnUtils.h>
#include <Formats/FormatFactory.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/VirtualColumnUtils.h>
#include <TableFunctions/ITableFunction.h>
#include "config.h"
namespace DB
{
@ -14,6 +14,7 @@ class Context;
class StorageS3Configuration;
class StorageAzureConfiguration;
class StorageHDFSConfiguration;
class StorageLocalConfiguration;
struct S3StorageSettings;
struct AzureStorageSettings;
struct HDFSStorageSettings;
@ -90,6 +91,17 @@ struct HDFSDefinition
static constexpr auto max_number_of_arguments = 4;
};
struct LocalDefinition
{
static constexpr auto name = "local";
static constexpr auto storage_type_name = "Local";
static constexpr auto signature = " - path\n"
" - path, format\n"
" - path, format, structure\n"
" - path, format, structure, compression_method\n";
static constexpr auto max_number_of_arguments = 4;
};
template <typename Definition, typename Configuration>
class TableFunctionObjectStorage : public ITableFunction
{
@ -169,4 +181,6 @@ using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, Stora
#if USE_HDFS
using TableFunctionHDFS = TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>;
#endif
using TableFunctionLocal = TableFunctionObjectStorage<LocalDefinition, StorageLocalConfiguration>;
}

View File

@ -4,24 +4,43 @@
namespace DB
{
#if USE_AWS_S3
#if USE_AVRO
void registerTableFunctionIceberg(TableFunctionFactory & factory)
{
# if USE_AWS_S3
factory.registerFunction<TableFunctionIceberg>(
{
.documentation =
{
.description=R"(The table function can be used to read the Iceberg table stored on object store.)",
{.documentation
= {.description = R"(The table function can be used to read the Iceberg table stored on S3 object store. Alias to icebergS3)",
.examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}},
.categories{"DataLake"}
},
.allow_readonly = false
});
.categories{"DataLake"}},
.allow_readonly = false});
factory.registerFunction<TableFunctionIcebergS3>(
{.documentation
= {.description = R"(The table function can be used to read the Iceberg table stored on S3 object store.)",
.examples{{"icebergS3", "SELECT * FROM icebergS3(url, access_key_id, secret_access_key)", ""}},
.categories{"DataLake"}},
.allow_readonly = false});
# endif
# if USE_AZURE_BLOB_STORAGE
factory.registerFunction<TableFunctionIcebergAzure>(
{.documentation
= {.description = R"(The table function can be used to read the Iceberg table stored on Azure object store.)",
.examples{{"icebergAzure", "SELECT * FROM icebergAzure(url, access_key_id, secret_access_key)", ""}},
.categories{"DataLake"}},
.allow_readonly = false});
# endif
factory.registerFunction<TableFunctionIcebergLocal>(
{.documentation
= {.description = R"(The table function can be used to read the Iceberg table stored locally.)",
.examples{{"icebergLocal", "SELECT * FROM icebergLocal(filename)", ""}},
.categories{"DataLake"}},
.allow_readonly = false});
}
#endif
#if USE_PARQUET
#if USE_AWS_S3
# if USE_PARQUET
void registerTableFunctionDeltaLake(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionDeltaLake>(
@ -55,11 +74,11 @@ void registerTableFunctionHudi(TableFunctionFactory & factory)
void registerDataLakeTableFunctions(TableFunctionFactory & factory)
{
UNUSED(factory);
#if USE_AWS_S3
#if USE_AVRO
registerTableFunctionIceberg(factory);
#endif
#if USE_PARQUET
#if USE_AWS_S3
# if USE_PARQUET
registerTableFunctionDeltaLake(factory);
#endif
registerTableFunctionHudi(factory);

View File

@ -286,4 +286,7 @@ class Utils:
@staticmethod
def is_job_triggered_manually():
return "robot" not in Envs.GITHUB_ACTOR
return (
"robot" not in Envs.GITHUB_ACTOR
and "clickhouse-ci" not in Envs.GITHUB_ACTOR
)

View File

@ -2,30 +2,92 @@ from minio import Minio
import glob
import os
import json
import shutil
def upload_directory(minio_client, bucket_name, local_path, s3_path):
result_files = []
for local_file in glob.glob(local_path + "/**"):
if os.path.isfile(local_file):
from enum import Enum
class CloudUploader:
def upload_directory(self, local_path, remote_blob_path, **kwargs):
print(kwargs)
result_files = []
# print(f"Arguments: {local_path}, {s3_path}")
# for local_file in glob.glob(local_path + "/**"):
# print("Local file: {}", local_file)
for local_file in glob.glob(local_path + "/**"):
result_local_path = os.path.join(local_path, local_file)
result_s3_path = os.path.join(s3_path, local_file)
print(f"Putting file {result_local_path} to {result_s3_path}")
minio_client.fput_object(
bucket_name=bucket_name,
object_name=result_s3_path,
file_path=result_local_path,
result_remote_blob_path = os.path.join(remote_blob_path, local_file)
if os.path.isfile(local_file):
self.upload_file(result_local_path, result_remote_blob_path, **kwargs)
result_files.append(result_remote_blob_path)
else:
files = self.upload_directory(
result_local_path, result_remote_blob_path, **kwargs
)
result_files.extend(files)
return result_files
class S3Uploader(CloudUploader):
def __init__(self, minio_client, bucket_name):
self.minio_client = minio_client
self.bucket_name = bucket_name
def upload_file(self, local_path, remote_blob_path, bucket=None):
print(f"Upload to bucket: {bucket}")
if bucket is None:
bucket = self.bucket_name
self.minio_client.fput_object(
bucket_name=bucket,
object_name=remote_blob_path,
file_path=local_path,
)
class LocalUploader(CloudUploader):
def __init__(self, clickhouse_node):
self.clickhouse_node = clickhouse_node
def upload_file(self, local_path, remote_blob_path):
dir_path = os.path.dirname(remote_blob_path)
if dir_path != "":
self.clickhouse_node.exec_in_container(
[
"bash",
"-c",
"mkdir -p {}".format(dir_path),
]
)
result_files.append(result_s3_path)
self.clickhouse_node.copy_file_to_container(local_path, remote_blob_path)
class AzureUploader(CloudUploader):
def __init__(self, blob_service_client, container_name):
self.blob_service_client = blob_service_client
self.container_client = self.blob_service_client.get_container_client(
container_name
)
def upload_file(self, local_path, remote_blob_path, container_name=None):
if container_name is None:
container_client = self.container_client
else:
files = upload_directory(
minio_client,
bucket_name,
os.path.join(local_path, local_file),
os.path.join(s3_path, local_file),
container_client = self.blob_service_client.get_container_client(
container_name
)
result_files.extend(files)
return result_files
blob_client = container_client.get_blob_client(remote_blob_path)
with open(local_path, "rb") as data:
blob_client.upload_blob(data, overwrite=True)
def upload_directory(minio_client, bucket, local_path, remote_path):
return S3Uploader(minio_client=minio_client, bucket_name=bucket).upload_directory(
local_path, remote_path
)
def get_file_contents(minio_client, bucket, s3_path):

View File

@ -200,7 +200,6 @@ def test_distributed_replica_max_ignored_errors():
"connect_timeout": 2,
"receive_timeout": 2,
"send_timeout": 2,
"idle_connection_timeout": 2,
"tcp_keep_alive_timeout": 2,
"distributed_replica_max_ignored_errors": 0,
"distributed_replica_error_half_life": 60,

View File

@ -6,11 +6,17 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1", user_configs=["config/config.xml"], with_zookeeper=True
"node1",
user_configs=["config/config.xml"],
with_zookeeper=True,
macros={"replica": "a", "shard": "shard1"},
)
node2 = cluster.add_instance(
"node2", user_configs=["config/config.xml"], with_zookeeper=True
"node2",
user_configs=["config/config.xml"],
with_zookeeper=True,
macros={"replica": "b", "shard": "shard1"},
)
@ -129,8 +135,8 @@ def test_single_node_normal(started_cluster):
def test_replicated_table_ddl(started_cluster):
node1.query("DROP TABLE IF EXISTS test_stat")
node2.query("DROP TABLE IF EXISTS test_stat")
node1.query("DROP TABLE IF EXISTS test_stat SYNC")
node2.query("DROP TABLE IF EXISTS test_stat SYNC")
node1.query(
"""
@ -183,3 +189,19 @@ def test_replicated_table_ddl(started_cluster):
)
check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "a", True)
check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "b", True)
def test_replicated_db(started_cluster):
node1.query("DROP DATABASE IF EXISTS test SYNC")
node2.query("DROP DATABASE IF EXISTS test SYNC")
node1.query(
"CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
)
node2.query(
"CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
)
node1.query(
"CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()"
)
node2.query("ALTER TABLE test.test_stats MODIFY COLUMN b Float64")
node2.query("ALTER TABLE test.test_stats MODIFY STATISTICS b TYPE tdigest")

View File

@ -29,6 +29,9 @@ from datetime import datetime
from pyspark.sql.functions import monotonically_increasing_id, row_number
from pyspark.sql.window import Window
from minio.deleteobjects import DeleteObject
import pyarrow as pa
import pyarrow.parquet as pq
from deltalake.writer import write_deltalake
from helpers.s3_tools import (
prepare_s3_bucket,
@ -728,3 +731,96 @@ SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.mini
)
== 1
)
def test_complex_types(started_cluster):
node = started_cluster.instances["node1"]
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
schema = pa.schema(
[
("id", pa.int32()),
("name", pa.string()),
(
"address",
pa.struct(
[
("street", pa.string()),
("city", pa.string()),
("state", pa.string()),
]
),
),
("interests", pa.list_(pa.string())),
(
"metadata",
pa.map_(
pa.string(), pa.string()
), # Map with string keys and string values
),
]
)
# Create sample data
data = [
pa.array([1, 2, 3], type=pa.int32()),
pa.array(["John Doe", "Jane Smith", "Jake Johnson"], type=pa.string()),
pa.array(
[
{"street": "123 Elm St", "city": "Springfield", "state": "IL"},
{"street": "456 Maple St", "city": "Shelbyville", "state": "IL"},
{"street": "789 Oak St", "city": "Ogdenville", "state": "IL"},
],
type=schema.field("address").type,
),
pa.array(
[
pa.array(["dancing", "coding", "hiking"]),
pa.array(["dancing", "coding", "hiking"]),
pa.array(["dancing", "coding", "hiking"]),
],
type=schema.field("interests").type,
),
pa.array(
[
{"key1": "value1", "key2": "value2"},
{"key1": "value3", "key2": "value4"},
{"key1": "value5", "key2": "value6"},
],
type=schema.field("metadata").type,
),
]
endpoint_url = f"http://{started_cluster.minio_ip}:{started_cluster.minio_port}"
aws_access_key_id = "minio"
aws_secret_access_key = "minio123"
table_name = randomize_table_name("test_complex_types")
storage_options = {
"AWS_ENDPOINT_URL": endpoint_url,
"AWS_ACCESS_KEY_ID": aws_access_key_id,
"AWS_SECRET_ACCESS_KEY": aws_secret_access_key,
"AWS_ALLOW_HTTP": "true",
"AWS_S3_ALLOW_UNSAFE_RENAME": "true",
}
path = f"s3://root/{table_name}"
table = pa.Table.from_arrays(data, schema=schema)
write_deltalake(path, table, storage_options=storage_options)
assert "1\n2\n3\n" in node.query(
f"SELECT id FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , 'minio', 'minio123')"
)
assert (
"('123 Elm St','Springfield','IL')\n('456 Maple St','Shelbyville','IL')\n('789 Oak St','Ogdenville','IL')"
in node.query(
f"SELECT address FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , 'minio', 'minio123')"
)
)
assert (
"{'key1':'value1','key2':'value2'}\n{'key1':'value3','key2':'value4'}\n{'key1':'value5','key2':'value6'}"
in node.query(
f"SELECT metadata FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , 'minio', 'minio123')"
)
)

View File

@ -5,5 +5,11 @@
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3>
<azure>
<account_name>devstoreaccount1</account_name>
<account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
</azure>
<local>
</local>
</named_collections>
</clickhouse>

View File

@ -28,12 +28,15 @@ from pyspark.sql.functions import monotonically_increasing_id, row_number
from pyspark.sql.window import Window
from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
from minio.deleteobjects import DeleteObject
from azure.storage.blob import BlobServiceClient
from helpers.s3_tools import (
prepare_s3_bucket,
upload_directory,
get_file_contents,
list_s3_objects,
S3Uploader,
AzureUploader,
LocalUploader,
)
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@ -67,6 +70,7 @@ def started_cluster():
main_configs=["configs/config.d/named_collections.xml"],
user_configs=["configs/users.d/users.xml"],
with_minio=True,
with_azurite=True,
stay_alive=True,
)
@ -77,6 +81,25 @@ def started_cluster():
logging.info("S3 bucket created")
cluster.spark_session = get_spark()
cluster.default_s3_uploader = S3Uploader(
cluster.minio_client, cluster.minio_bucket
)
cluster.azure_container_name = "mycontainer"
cluster.blob_service_client = cluster.blob_service_client
container_client = cluster.blob_service_client.create_container(
cluster.azure_container_name
)
cluster.container_client = container_client
cluster.default_azure_uploader = AzureUploader(
cluster.blob_service_client, cluster.azure_container_name
)
cluster.default_local_uploader = LocalUploader(cluster.instances["node1"])
yield cluster
@ -142,12 +165,65 @@ def generate_data(spark, start, end):
return df
def create_iceberg_table(node, table_name, format="Parquet", bucket="root"):
def get_creation_expression(
storage_type,
table_name,
cluster,
format="Parquet",
table_function=False,
**kwargs,
):
if storage_type == "s3":
if "bucket" in kwargs:
bucket = kwargs["bucket"]
else:
bucket = cluster.minio_bucket
print(bucket)
if table_function:
return f"icebergS3(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')"
else:
return f"""
DROP TABLE IF EXISTS {table_name};
CREATE TABLE {table_name}
ENGINE=IcebergS3(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')"""
elif storage_type == "azure":
if table_function:
return f"""
icebergAzure(azure, container = '{cluster.azure_container_name}', storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format})
"""
else:
return f"""
DROP TABLE IF EXISTS {table_name};
CREATE TABLE {table_name}
ENGINE=IcebergAzure(azure, container = {cluster.azure_container_name}, storage_account_url = '{cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]}', blob_path = '/iceberg_data/default/{table_name}/', format={format})"""
elif storage_type == "local":
if table_function:
return f"""
icebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format})
"""
else:
return f"""
DROP TABLE IF EXISTS {table_name};
CREATE TABLE {table_name}
ENGINE=IcebergLocal(local, path = '/iceberg_data/default/{table_name}/', format={format});"""
else:
raise Exception(f"Unknown iceberg storage type: {storage_type}")
def get_uuid_str():
return str(uuid.uuid4()).replace("-", "_")
def create_iceberg_table(
storage_type,
node,
table_name,
cluster,
format="Parquet",
**kwargs,
):
node.query(
f"""
DROP TABLE IF EXISTS {table_name};
CREATE TABLE {table_name}
ENGINE=Iceberg(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')"""
get_creation_expression(storage_type, table_name, cluster, format, **kwargs)
)
@ -170,40 +246,69 @@ def create_initial_data_file(
return result_path
def default_upload_directory(
started_cluster, storage_type, local_path, remote_path, **kwargs
):
if storage_type == "local":
return started_cluster.default_local_uploader.upload_directory(
local_path, remote_path, **kwargs
)
elif storage_type == "s3":
print(kwargs)
return started_cluster.default_s3_uploader.upload_directory(
local_path, remote_path, **kwargs
)
elif storage_type == "azure":
return started_cluster.default_azure_uploader.upload_directory(
local_path, remote_path, **kwargs
)
else:
raise Exception(f"Unknown iceberg storage type: {storage_type}")
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_single_iceberg_file(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_single_iceberg_file(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_single_iceberg_file_" + format_version
inserted_data = "SELECT number, toString(number) as string FROM numbers(100)"
parquet_data_path = create_initial_data_file(
started_cluster, instance, inserted_data, TABLE_NAME
TABLE_NAME = (
"test_single_iceberg_file_"
+ format_version
+ "_"
+ storage_type
+ "_"
+ get_uuid_str()
)
write_iceberg_from_file(
spark, parquet_data_path, TABLE_NAME, format_version=format_version
write_iceberg_from_df(spark, generate_data(spark, 0, 100), TABLE_NAME)
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
create_iceberg_table(instance, TABLE_NAME)
assert instance.query(f"SELECT * FROM {TABLE_NAME}") == instance.query(
inserted_data
"SELECT number, toString(number + 1) FROM numbers(100)"
)
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_partition_by(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_partition_by(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_partition_by_" + format_version
TABLE_NAME = (
"test_partition_by_"
+ format_version
+ "_"
+ storage_type
+ "_"
+ get_uuid_str()
)
write_iceberg_from_df(
spark,
@ -214,22 +319,33 @@ def test_partition_by(started_cluster, format_version):
partition_by="a",
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
assert len(files) == 14 # 10 partitiions + 4 metadata files
create_iceberg_table(instance, TABLE_NAME)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 10
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_multiple_iceberg_files(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_multiple_iceberg_files(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_multiple_iceberg_files_" + format_version
TABLE_NAME = (
"test_multiple_iceberg_files_"
+ format_version
+ "_"
+ storage_type
+ "_"
+ get_uuid_str()
)
write_iceberg_from_df(
spark,
@ -239,9 +355,13 @@ def test_multiple_iceberg_files(started_cluster, format_version):
format_version=format_version,
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
# ['/iceberg_data/default/test_multiple_iceberg_files/data/00000-1-35302d56-f1ed-494e-a85b-fbf85c05ab39-00001.parquet',
# '/iceberg_data/default/test_multiple_iceberg_files/metadata/version-hint.text',
# '/iceberg_data/default/test_multiple_iceberg_files/metadata/3127466b-299d-48ca-a367-6b9b1df1e78c-m0.avro',
@ -249,7 +369,7 @@ def test_multiple_iceberg_files(started_cluster, format_version):
# '/iceberg_data/default/test_multiple_iceberg_files/metadata/v1.metadata.json']
assert len(files) == 5
create_iceberg_table(instance, TABLE_NAME)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100
write_iceberg_from_df(
@ -259,8 +379,11 @@ def test_multiple_iceberg_files(started_cluster, format_version):
mode="append",
format_version=format_version,
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
"",
)
assert len(files) == 9
@ -271,12 +394,13 @@ def test_multiple_iceberg_files(started_cluster, format_version):
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_types(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_types(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_types_" + format_version
TABLE_NAME = (
"test_types_" + format_version + "_" + storage_type + "_" + get_uuid_str()
)
data = [
(
@ -302,22 +426,29 @@ def test_types(started_cluster, format_version):
spark, df, TABLE_NAME, mode="overwrite", format_version=format_version
)
upload_directory(minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}", "")
default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
create_iceberg_table(instance, TABLE_NAME)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 1
assert (
instance.query(f"SELECT a, b, c, d, e FROM {TABLE_NAME}").strip()
== "123\tstring\t2000-01-01\t['str1','str2']\ttrue"
)
table_function = f"iceberg(s3, filename='iceberg_data/default/{TABLE_NAME}/')"
table_function_expr = get_creation_expression(
storage_type, TABLE_NAME, started_cluster, table_function=True
)
assert (
instance.query(f"SELECT a, b, c, d, e FROM {table_function}").strip()
instance.query(f"SELECT a, b, c, d, e FROM {table_function_expr}").strip()
== "123\tstring\t2000-01-01\t['str1','str2']\ttrue"
)
assert instance.query(f"DESCRIBE {table_function} FORMAT TSV") == TSV(
assert instance.query(f"DESCRIBE {table_function_expr} FORMAT TSV") == TSV(
[
["a", "Nullable(Int32)"],
["b", "Nullable(String)"],
@ -329,12 +460,20 @@ def test_types(started_cluster, format_version):
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_delete_files(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_delete_files(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_delete_files_" + format_version
TABLE_NAME = (
"test_delete_files_"
+ format_version
+ "_"
+ storage_type
+ "_"
+ get_uuid_str()
)
write_iceberg_from_df(
spark,
@ -344,17 +483,22 @@ def test_delete_files(started_cluster, format_version):
format_version=format_version,
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
create_iceberg_table(instance, TABLE_NAME)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100
spark.sql(f"DELETE FROM {TABLE_NAME} WHERE a >= 0")
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
"",
)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 0
@ -368,27 +512,41 @@ def test_delete_files(started_cluster, format_version):
format_version=format_version,
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
"",
)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100
spark.sql(f"DELETE FROM {TABLE_NAME} WHERE a >= 150")
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
"",
)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 50
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_evolved_schema(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_evolved_schema(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_evolved_schema_" + format_version
TABLE_NAME = (
"test_evolved_schema_"
+ format_version
+ "_"
+ storage_type
+ "_"
+ get_uuid_str()
)
write_iceberg_from_df(
spark,
@ -398,19 +556,25 @@ def test_evolved_schema(started_cluster, format_version):
format_version=format_version,
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
create_iceberg_table(instance, TABLE_NAME)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100
expected_data = instance.query(f"SELECT * FROM {TABLE_NAME} order by a, b")
spark.sql(f"ALTER TABLE {TABLE_NAME} ADD COLUMNS (x bigint)")
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
"",
)
error = instance.query_and_get_error(f"SELECT * FROM {TABLE_NAME}")
@ -422,12 +586,13 @@ def test_evolved_schema(started_cluster, format_version):
assert data == expected_data
def test_row_based_deletes(started_cluster):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_row_based_deletes(started_cluster, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_row_based_deletes"
TABLE_NAME = "test_row_based_deletes_" + storage_type + "_" + get_uuid_str()
spark.sql(
f"CREATE TABLE {TABLE_NAME} (id bigint, data string) USING iceberg TBLPROPERTIES ('format-version' = '2', 'write.update.mode'='merge-on-read', 'write.delete.mode'='merge-on-read', 'write.merge.mode'='merge-on-read')"
@ -436,17 +601,23 @@ def test_row_based_deletes(started_cluster):
f"INSERT INTO {TABLE_NAME} select id, char(id + ascii('a')) from range(100)"
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
create_iceberg_table(instance, TABLE_NAME)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100
spark.sql(f"DELETE FROM {TABLE_NAME} WHERE id < 10")
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
"",
)
error = instance.query_and_get_error(f"SELECT * FROM {TABLE_NAME}")
@ -454,13 +625,21 @@ def test_row_based_deletes(started_cluster):
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_schema_inference(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_schema_inference(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
for format in ["Parquet", "ORC", "Avro"]:
TABLE_NAME = "test_schema_inference_" + format + "_" + format_version
TABLE_NAME = (
"test_schema_inference_"
+ format
+ "_"
+ format_version
+ "_"
+ storage_type
+ "_"
+ get_uuid_str()
)
# Types time, timestamptz, fixed are not supported in Spark.
spark.sql(
@ -470,12 +649,16 @@ def test_schema_inference(started_cluster, format_version):
spark.sql(
f"insert into {TABLE_NAME} select 42, 4242, 42.42, 4242.4242, decimal(42.42), decimal(42.42), decimal(42.42), date('2020-01-01'), timestamp('2020-01-01 20:00:00'), 'hello', binary('hello'), array(1,2,3), map('key', 'value'), struct(42, 'hello'), array(struct(map('key', array(map('key', 42))), struct(42, 'hello')))"
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
create_iceberg_table(instance, TABLE_NAME, format)
create_iceberg_table(
storage_type, instance, TABLE_NAME, started_cluster, format=format
)
res = instance.query(
f"DESC {TABLE_NAME} FORMAT TSVRaw", settings={"print_pretty_type_names": 0}
@ -510,12 +693,18 @@ def test_schema_inference(started_cluster, format_version):
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_metadata_file_selection(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_metadata_file_selection(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_metadata_selection_" + format_version
TABLE_NAME = (
"test_metadata_selection_"
+ format_version
+ "_"
+ storage_type
+ "_"
+ get_uuid_str()
)
spark.sql(
f"CREATE TABLE {TABLE_NAME} (id bigint, data string) USING iceberg TBLPROPERTIES ('format-version' = '2', 'write.update.mode'='merge-on-read', 'write.delete.mode'='merge-on-read', 'write.merge.mode'='merge-on-read')"
@ -526,22 +715,31 @@ def test_metadata_file_selection(started_cluster, format_version):
f"INSERT INTO {TABLE_NAME} select id, char(id + ascii('a')) from range(10)"
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
create_iceberg_table(instance, TABLE_NAME)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 500
@pytest.mark.parametrize("format_version", ["1", "2"])
def test_metadata_file_format_with_uuid(started_cluster, format_version):
@pytest.mark.parametrize("storage_type", ["s3", "azure", "local"])
def test_metadata_file_format_with_uuid(started_cluster, format_version, storage_type):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
minio_client = started_cluster.minio_client
bucket = started_cluster.minio_bucket
TABLE_NAME = "test_metadata_selection_with_uuid_" + format_version
TABLE_NAME = (
"test_metadata_selection_with_uuid_"
+ format_version
+ "_"
+ storage_type
+ "_"
+ get_uuid_str()
)
spark.sql(
f"CREATE TABLE {TABLE_NAME} (id bigint, data string) USING iceberg TBLPROPERTIES ('format-version' = '2', 'write.update.mode'='merge-on-read', 'write.delete.mode'='merge-on-read', 'write.merge.mode'='merge-on-read')"
@ -555,40 +753,48 @@ def test_metadata_file_format_with_uuid(started_cluster, format_version):
for i in range(50):
os.rename(
f"/iceberg_data/default/{TABLE_NAME}/metadata/v{i + 1}.metadata.json",
f"/iceberg_data/default/{TABLE_NAME}/metadata/{str(i).zfill(5)}-{uuid.uuid4()}.metadata.json",
f"/iceberg_data/default/{TABLE_NAME}/metadata/{str(i).zfill(5)}-{get_uuid_str()}.metadata.json",
)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
storage_type,
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
)
create_iceberg_table(instance, TABLE_NAME)
create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 500
def test_restart_broken(started_cluster):
def test_restart_broken_s3(started_cluster):
instance = started_cluster.instances["node1"]
spark = started_cluster.spark_session
TABLE_NAME = "test_restart_broken_table_function_s3" + "_" + get_uuid_str()
minio_client = started_cluster.minio_client
bucket = "broken2"
TABLE_NAME = "test_restart_broken_table_function"
if not minio_client.bucket_exists(bucket):
minio_client.make_bucket(bucket)
parquet_data_path = create_initial_data_file(
started_cluster,
instance,
"SELECT number, toString(number) FROM numbers(100)",
write_iceberg_from_df(
spark,
generate_data(spark, 0, 100),
TABLE_NAME,
mode="overwrite",
format_version="1",
)
write_iceberg_from_file(spark, parquet_data_path, TABLE_NAME, format_version="1")
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
"s3",
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
bucket=bucket,
)
create_iceberg_table(instance, TABLE_NAME, bucket=bucket)
create_iceberg_table("s3", instance, TABLE_NAME, started_cluster, bucket=bucket)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100
s3_objects = list_s3_objects(minio_client, bucket, prefix="")
@ -613,8 +819,12 @@ def test_restart_broken(started_cluster):
minio_client.make_bucket(bucket)
files = upload_directory(
minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", ""
files = default_upload_directory(
started_cluster,
"s3",
f"/iceberg_data/default/{TABLE_NAME}/",
f"/iceberg_data/default/{TABLE_NAME}/",
bucket=bucket,
)
assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100

View File

@ -6,8 +6,8 @@ CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier};
CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.A (A UInt8) ENGINE = TinyLog;
CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.B (A UInt8) ENGINE = TinyLog;
SHOW TABLES from {CLICKHOUSE_DATABASE:Identifier};
SHOW TABLES in system where engine like '%System%' and name in ('numbers', 'one');
SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier};
SHOW TABLES IN system WHERE engine LIKE '%System%' AND name IN ('numbers', 'one') AND database = 'system';
SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = currentDatabase() ORDER BY name FORMAT TSVRaw;
@ -16,7 +16,7 @@ SELECT name FROM system.tables WHERE is_temporary = 1 AND name = 'test_temporary
CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test_log(id UInt64) ENGINE = Log;
CREATE MATERIALIZED VIEW {CLICKHOUSE_DATABASE:Identifier}.test_materialized ENGINE = Log AS SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_log;
SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log' and database=currentDatabase();
SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log' AND database=currentDatabase();
DROP DATABASE {CLICKHOUSE_DATABASE:Identifier};

View File

@ -1,5 +1,4 @@
#!/usr/bin/env bash
# Tags: no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@ -42,6 +41,7 @@ function check()
while [ "$query_result" != "2.2" ]
do
sleep 0.2
query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))")
done
}

View File

@ -1,5 +1,4 @@
#!/usr/bin/env bash
# Tags: no-random-settings, no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,5 +1,4 @@
#!/usr/bin/env bash
# Tags: no-random-settings, no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,5 +1,4 @@
#!/usr/bin/env bash
# Tags: no-random-settings, no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

Some files were not shown because too many files have changed in this diff Show More