mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge branch 'master' into output-format-json-compact-with-progress
This commit is contained in:
commit
33512a0d6e
@ -30,7 +30,6 @@
|
||||
* Support more variants of JOIN strictness (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequality conditions which involve columns from both left and right table. e.g. `t1.y < t2.y` (see the setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Intrpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Add function `printf` for Spark compatiability (but you can use the existing `format` function). [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)).
|
||||
* Added a new server setting, `disable_insertion_and_mutation`. If it is enabled, the server will deny all insertions and mutations. This includes asynchronous INSERTs. This setting can be used to create read-only replicas. [#66519](https://github.com/ClickHouse/ClickHouse/pull/66519) ([Xu Jia](https://github.com/XuJia0210)).
|
||||
* Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to `Null` engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Added support for reading `MULTILINESTRING` geometry in `WKT` format using function `readWKTLineString`. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)).
|
||||
* Add a new table function `fuzzQuery`. This function allows the modification of a given query string with random variations. Example: `SELECT query FROM fuzzQuery('SELECT 1') LIMIT 5;`. [#67655](https://github.com/ClickHouse/ClickHouse/pull/67655) ([pufit](https://github.com/pufit)).
|
||||
|
@ -56,6 +56,15 @@ Other upcoming meetups
|
||||
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
|
||||
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
|
||||
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
|
||||
* [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
|
||||
* [Madrid Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096564/) - October 22
|
||||
* [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - October 29
|
||||
* [Oslo Meetup](https://www.meetup.com/open-source-real-time-data-warehouse-real-time-analytics/events/302938622) - October 31
|
||||
* [Ghent Meetup](https://www.meetup.com/clickhouse-belgium-user-group/events/303049405/) - November 19
|
||||
* [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
|
||||
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
|
||||
|
||||
|
||||
|
||||
## Recent Recordings
|
||||
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
||||
|
@ -18,7 +18,9 @@
|
||||
#define Net_HTTPResponse_INCLUDED
|
||||
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "Poco/Net/HTTPCookie.h"
|
||||
#include "Poco/Net/HTTPMessage.h"
|
||||
#include "Poco/Net/Net.h"
|
||||
@ -180,6 +182,8 @@ namespace Net
|
||||
/// May throw an exception in case of a malformed
|
||||
/// Set-Cookie header.
|
||||
|
||||
void getHeaders(std::map<std::string, std::string> & headers) const;
|
||||
|
||||
void write(std::ostream & ostr) const;
|
||||
/// Writes the HTTP response to the given
|
||||
/// output stream.
|
||||
|
@ -209,6 +209,15 @@ void HTTPResponse::getCookies(std::vector<HTTPCookie>& cookies) const
|
||||
}
|
||||
}
|
||||
|
||||
void HTTPResponse::getHeaders(std::map<std::string, std::string> & headers) const
|
||||
{
|
||||
headers.clear();
|
||||
for (const auto & it : *this)
|
||||
{
|
||||
headers.emplace(it.first, it.second);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void HTTPResponse::write(std::ostream& ostr) const
|
||||
{
|
||||
|
@ -311,6 +311,14 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags)
|
||||
while (mustRetry(rc, remaining_time));
|
||||
if (rc <= 0)
|
||||
{
|
||||
// At this stage we still can have last not yet received SSL message containing SSL error
|
||||
// so make a read to force SSL to process possible SSL error
|
||||
if (SSL_get_error(_pSSL, rc) == SSL_ERROR_SYSCALL && SocketImpl::lastError() == POCO_ECONNRESET)
|
||||
{
|
||||
char c = 0;
|
||||
SSL_read(_pSSL, &c, 1);
|
||||
}
|
||||
|
||||
rc = handleError(rc);
|
||||
if (rc == 0) throw SSLConnectionUnexpectedlyClosedException();
|
||||
}
|
||||
|
@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "x86_64-pc-freebsd11")
|
||||
set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11")
|
||||
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64")
|
||||
|
||||
# dprintf is used in a patched version of replxx
|
||||
add_compile_definitions(_WITH_DPRINTF)
|
||||
|
||||
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake
|
||||
|
2
contrib/libfiu
vendored
2
contrib/libfiu
vendored
@ -1 +1 @@
|
||||
Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5
|
||||
Subproject commit a1290d8cd3d7b4541d6c976e0a54f572ac03f2a3
|
2
contrib/replxx
vendored
2
contrib/replxx
vendored
@ -1 +1 @@
|
||||
Subproject commit 5d04501f93a4fb7f0bb8b73b8f614bc986f9e25b
|
||||
Subproject commit 711c18e7f4d951255aa8b0851e5a55d5a5fb0ddb
|
2
contrib/usearch
vendored
2
contrib/usearch
vendored
@ -1 +1 @@
|
||||
Subproject commit e21a5778a0d4469ddaf38c94b7be0196bb701ee4
|
||||
Subproject commit 7a8967cb442b08ca20c3dd781414378e65957d37
|
@ -112,3 +112,5 @@ wadllib==1.3.6
|
||||
websocket-client==0.59.0
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
||||
deltalake==0.16.0
|
||||
|
||||
|
@ -13,7 +13,8 @@ entry="/usr/share/clickhouse-test/performance/scripts/entrypoint.sh"
|
||||
# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
|
||||
# Double-escaped backslashes are a tribute to the engineering wonder of docker --
|
||||
# it gives '/bin/sh: 1: [bash,: not found' otherwise.
|
||||
numactl --hardware
|
||||
echo > compare.log
|
||||
numactl --hardware | tee -a compare.log
|
||||
node=$(( RANDOM % $(numactl --hardware | sed -n 's/^.*available:\(.*\)nodes.*$/\1/p') ));
|
||||
echo Will bind to NUMA node $node;
|
||||
echo Will bind to NUMA node $node | tee -a compare.log
|
||||
numactl --cpunodebind=$node --membind=$node $entry
|
||||
|
@ -6,28 +6,34 @@ sidebar_label: Iceberg
|
||||
|
||||
# Iceberg Table Engine
|
||||
|
||||
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3.
|
||||
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure and locally stored tables.
|
||||
|
||||
## Create Table
|
||||
|
||||
Note that the Iceberg table must already exist in S3, this command does not take DDL parameters to create a new table.
|
||||
Note that the Iceberg table must already exist in the storage, this command does not take DDL parameters to create a new table.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE iceberg_table
|
||||
ENGINE = Iceberg(url, [aws_access_key_id, aws_secret_access_key,])
|
||||
CREATE TABLE iceberg_table_s3
|
||||
ENGINE = IcebergS3(url, [, NOSIGN | access_key_id, secret_access_key, [session_token]], format, [,compression])
|
||||
|
||||
CREATE TABLE iceberg_table_azure
|
||||
ENGINE = IcebergAzure(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])
|
||||
|
||||
CREATE TABLE iceberg_table_local
|
||||
ENGINE = IcebergLocal(path_to_table, [,format] [,compression_method])
|
||||
```
|
||||
|
||||
**Engine parameters**
|
||||
**Engine arguments**
|
||||
|
||||
- `url` — url with the path to an existing Iceberg table.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
|
||||
Description of the arguments coincides with description of arguments in engines `S3`, `AzureBlobStorage` and `File` correspondingly.
|
||||
`format` stands for the format of data files in the Iceberg table.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE iceberg_table ENGINE=Iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
|
||||
CREATE TABLE iceberg_table ENGINE=IcebergS3('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
|
||||
```
|
||||
|
||||
Using named collections:
|
||||
@ -45,9 +51,15 @@ Using named collections:
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE iceberg_table ENGINE=Iceberg(iceberg_conf, filename = 'test_table')
|
||||
CREATE TABLE iceberg_table ENGINE=IcebergS3(iceberg_conf, filename = 'test_table')
|
||||
|
||||
```
|
||||
|
||||
**Aliases**
|
||||
|
||||
|
||||
Table engine `Iceberg` is an alias to `IcebergS3` now.
|
||||
|
||||
## See also
|
||||
|
||||
- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md)
|
||||
|
@ -54,7 +54,7 @@ Parameters:
|
||||
- `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a
|
||||
line between two points in Euclidean space), or `cosineDistance` (the [cosine
|
||||
distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors).
|
||||
- `quantization`: either `f32`, `f16`, or `i8` for storing the vector with reduced precision (optional, default: `f32`)
|
||||
- `quantization`: either `f64`, `f32`, `f16`, `bf16`, or `i8` for storing the vector with reduced precision (optional, default: `bf16`)
|
||||
- `m`: the number of neighbors per graph node (optional, default: 16)
|
||||
- `ef_construction`: (optional, default: 128)
|
||||
- `ef_search`: (optional, default: 64)
|
||||
|
@ -109,6 +109,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
|
||||
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
- `_headers` - HTTP response headers. Type: `Map(LowCardinality(String), LowCardinality(String))`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
|
@ -58,7 +58,7 @@ Connection: Close
|
||||
Content-Type: text/tab-separated-values; charset=UTF-8
|
||||
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
|
||||
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds": "0"}
|
||||
|
||||
1
|
||||
```
|
||||
@ -472,7 +472,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
# TYPE "Query" counter
|
||||
@ -668,7 +668,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
Say Hi!%
|
||||
@ -708,7 +708,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
|
||||
@ -766,7 +766,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -785,7 +785,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
|
@ -1389,7 +1389,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
|
||||
#### schema_inference_make_columns_nullable
|
||||
|
||||
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
||||
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if `input_format_null_as_default` is disabled and the column contains `NULL` in a sample that is parsed during schema inference.
|
||||
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
@ -1412,15 +1412,13 @@ DESC format(JSONEachRow, $$
|
||||
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
```sql
|
||||
SET schema_inference_make_columns_nullable = 0;
|
||||
SET input_format_null_as_default = 0;
|
||||
SET schema_inference_make_columns_nullable = 'auto';
|
||||
DESC format(JSONEachRow, $$
|
||||
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
|
||||
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
|
||||
$$)
|
||||
```
|
||||
```response
|
||||
|
||||
┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ id │ Int64 │ │ │ │ │ │
|
||||
│ age │ Int64 │ │ │ │ │ │
|
||||
@ -1432,7 +1430,6 @@ DESC format(JSONEachRow, $$
|
||||
|
||||
```sql
|
||||
SET schema_inference_make_columns_nullable = 0;
|
||||
SET input_format_null_as_default = 1;
|
||||
DESC format(JSONEachRow, $$
|
||||
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
|
||||
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
|
||||
|
@ -171,8 +171,8 @@ If the `schema_inference_hints` is not formated properly, or if there is a typo
|
||||
|
||||
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
|
||||
|
||||
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
||||
If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference.
|
||||
Controls making inferred types `Nullable` in schema inference.
|
||||
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
|
||||
|
||||
Default value: `true`.
|
||||
|
||||
|
@ -5633,7 +5633,6 @@ Default value: `1GiB`.
|
||||
## use_json_alias_for_old_object_type
|
||||
|
||||
When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/json.md) type instead of the new [JSON](../../sql-reference/data-types/newjson.md) type.
|
||||
This setting requires server restart to take effect when changed.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
|
41
docs/en/operations/system-tables/projections.md
Normal file
41
docs/en/operations/system-tables/projections.md
Normal file
@ -0,0 +1,41 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/projections
|
||||
---
|
||||
# projections
|
||||
|
||||
Contains information about existing projections in all the tables.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Projection name.
|
||||
- `type` ([Enum](../../sql-reference/data-types/enum.md)) — Projection type ('Normal' = 0, 'Aggregate' = 1).
|
||||
- `sorting_key` ([Array(String)](../../sql-reference/data-types/array.md)) — Projection sorting key.
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) — Projection query.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.projections LIMIT 2 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
Row 1:
|
||||
──────
|
||||
database: default
|
||||
table: landing
|
||||
name: improved_sorting_key
|
||||
type: Normal
|
||||
sorting_key: ['user_id','date']
|
||||
query: SELECT * ORDER BY user_id, date
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
database: default
|
||||
table: landing
|
||||
name: agg_no_key
|
||||
type: Aggregate
|
||||
sorting_key: []
|
||||
query: SELECT count()
|
||||
```
|
@ -104,7 +104,7 @@ Events that occur at the same second may lay in the sequence in an undefined ord
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#pattern-syntax).
|
||||
|
||||
**Returned values**
|
||||
|
||||
@ -113,8 +113,7 @@ Events that occur at the same second may lay in the sequence in an undefined ord
|
||||
|
||||
Type: `UInt8`.
|
||||
|
||||
<a name="sequence-function-pattern-syntax"></a>
|
||||
**Pattern syntax**
|
||||
#### Pattern syntax
|
||||
|
||||
- `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter.
|
||||
|
||||
@ -196,7 +195,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#pattern-syntax).
|
||||
|
||||
**Returned values**
|
||||
|
||||
|
@ -453,8 +453,8 @@ As we can see, after inserting paths `e` and `f.g` the limit was reached and we
|
||||
|
||||
### During merges of data parts in MergeTree table engines
|
||||
|
||||
During merge of several data parts in MergeTree table the `JSON` column in the resulting data part can reach the limit of dynamic paths won't be able to store all paths from source parts as subcolumns.
|
||||
In this case ClickHouse chooses what paths will remain as subcolumns after merge and what types will be stored in the shared data structure. In most cases ClickHouse tries to keep paths that contains
|
||||
During merge of several data parts in MergeTree table the `JSON` column in the resulting data part can reach the limit of dynamic paths and won't be able to store all paths from source parts as subcolumns.
|
||||
In this case ClickHouse chooses what paths will remain as subcolumns after merge and what paths will be stored in the shared data structure. In most cases ClickHouse tries to keep paths that contain
|
||||
the largest number of non-null values and move the rarest paths to the shared data structure, but it depends on the implementation.
|
||||
|
||||
Let's see an example of such merge. First, let's create a table with `JSON` column, set the limit of dynamic paths to `3` and insert values with `5` different paths:
|
||||
|
@ -49,7 +49,7 @@ Result:
|
||||
|
||||
## multiIf
|
||||
|
||||
Allows to write the [CASE](../../sql-reference/operators/index.md#operator_case) operator more compactly in the query.
|
||||
Allows to write the [CASE](../../sql-reference/operators/index.md#conditional-expression) operator more compactly in the query.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -264,4 +264,4 @@ SELECT clamp(1, 2, 3) result, toTypeName(result) type;
|
||||
┌─result─┬─type────┐
|
||||
│ 2 │ Float64 │
|
||||
└────────┴─────────┘
|
||||
```
|
||||
```
|
||||
|
@ -2019,7 +2019,7 @@ Alias: `dateTrunc`.
|
||||
|
||||
`unit` argument is case-insensitive.
|
||||
|
||||
- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
|
||||
- `value` — Date and time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
@ -4287,7 +4287,7 @@ Result:
|
||||
|
||||
## fromModifiedJulianDay
|
||||
|
||||
Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number to a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD`. This function supports day number from `-678941` to `2973119` (which represent 0000-01-01 and 9999-12-31 respectively). It raises an exception if the day number is outside of the supported range.
|
||||
Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number to a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD`. This function supports day number from `-678941` to `2973483` (which represent 0000-01-01 and 9999-12-31 respectively). It raises an exception if the day number is outside of the supported range.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -6,7 +6,7 @@ title: "Functions for Working with Geohash"
|
||||
|
||||
## Geohash
|
||||
|
||||
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location.
|
||||
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer the geohash string is, the more precise the geographic location will be.
|
||||
|
||||
If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).
|
||||
|
||||
@ -14,26 +14,37 @@ If you need to manually convert geographic coordinates to geohash strings, you c
|
||||
|
||||
Encodes latitude and longitude as a [geohash](#geohash)-string.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
geohashEncode(longitude, latitude, [precision])
|
||||
```
|
||||
|
||||
**Input values**
|
||||
|
||||
- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`
|
||||
- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`
|
||||
- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`.
|
||||
- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data-types/float.md).
|
||||
- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data-types/float.md).
|
||||
- `precision` (optional) — Length of the resulting encoded string. Defaults to `12`. Integer in the range `[1, 12]`. [Int8](../../data-types/int-uint.md).
|
||||
|
||||
:::note
|
||||
- All coordinate parameters must be of the same type: either `Float32` or `Float64`.
|
||||
- For the `precision` parameter, any value less than `1` or greater than `12` is silently converted to `12`.
|
||||
:::
|
||||
|
||||
**Returned values**
|
||||
|
||||
- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used).
|
||||
- Alphanumeric string of the encoded coordinate (modified version of the base32-encoding alphabet is used). [String](../../data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res──────────┐
|
||||
│ ezs42d000000 │
|
||||
@ -44,13 +55,19 @@ SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res;
|
||||
|
||||
Decodes any [geohash](#geohash)-encoded string into longitude and latitude.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
geohashDecode(hash_str)
|
||||
```
|
||||
|
||||
**Input values**
|
||||
|
||||
- encoded string - geohash-encoded string.
|
||||
- `hash_str` — Geohash-encoded string.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude.
|
||||
- Tuple `(longitude, latitude)` of `Float64` values of longitude and latitude. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md))
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -688,6 +688,40 @@ SELECT kostikConsistentHash(16045690984833335023, 2);
|
||||
└───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## ripeMD160
|
||||
|
||||
Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ripeMD160(input)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `input`: Input string. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A [UInt256](../data-types/int-uint.md) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded.
|
||||
|
||||
**Example**
|
||||
|
||||
Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
|
||||
```
|
||||
|
||||
```response
|
||||
┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
|
||||
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
|
||||
└───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## murmurHash2_32, murmurHash2_64
|
||||
|
||||
Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.
|
||||
|
@ -49,6 +49,55 @@ SETTINGS cast_keep_nullable = 1
|
||||
└──────────────────┴─────────────────────┴──────────────────┘
|
||||
```
|
||||
|
||||
## toBool
|
||||
|
||||
Converts an input value to a value of type [`Bool`](../data-types/boolean.md). Throws an exception in case of an error.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
toBool(expr)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `expr` — Expression returning a number or a string. [Expression](../syntax.md/#syntax-expressions).
|
||||
|
||||
Supported arguments:
|
||||
- Values of type (U)Int8/16/32/64/128/256.
|
||||
- Values of type Float32/64.
|
||||
- Strings `true` or `false` (case-insensitive).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns `true` or `false` based on evaluation of the argument. [Bool](../data-types/boolean.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toBool(toUInt8(1)),
|
||||
toBool(toInt8(-1)),
|
||||
toBool(toFloat32(1.01)),
|
||||
toBool('true'),
|
||||
toBool('false'),
|
||||
toBool('FALSE')
|
||||
FORMAT Vertical
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
toBool(toUInt8(1)): true
|
||||
toBool(toInt8(-1)): true
|
||||
toBool(toFloat32(1.01)): true
|
||||
toBool('true'): true
|
||||
toBool('false'): false
|
||||
toBool('FALSE'): false
|
||||
```
|
||||
|
||||
## toInt8
|
||||
|
||||
Converts an input value to a value of type [`Int8`](../data-types/int-uint.md). Throws an exception in case of an error.
|
||||
|
@ -8,7 +8,7 @@ title: "CREATE ROW POLICY"
|
||||
Creates a [row policy](../../../guides/sre/user-management/index.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table.
|
||||
|
||||
:::tip
|
||||
Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies.
|
||||
Row policies make sense only for users with readonly access. If a user can modify a table or copy partitions between tables, it defeats the restrictions of row policies.
|
||||
:::
|
||||
|
||||
Syntax:
|
||||
@ -24,40 +24,40 @@ CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluste
|
||||
|
||||
## USING Clause
|
||||
|
||||
Allows to specify a condition to filter rows. An user will see a row if the condition is calculated to non-zero for the row.
|
||||
Allows specifying a condition to filter rows. A user will see a row if the condition is calculated to non-zero for the row.
|
||||
|
||||
## TO Clause
|
||||
|
||||
In the section `TO` you can provide a list of users and roles this policy should work for. For example, `CREATE ROW POLICY ... TO accountant, john@localhost`.
|
||||
In the `TO` section you can provide a list of users and roles this policy should work for. For example, `CREATE ROW POLICY ... TO accountant, john@localhost`.
|
||||
|
||||
Keyword `ALL` means all the ClickHouse users including current user. Keyword `ALL EXCEPT` allow to exclude some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost`
|
||||
Keyword `ALL` means all the ClickHouse users, including current user. Keyword `ALL EXCEPT` allows excluding some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost`
|
||||
|
||||
:::note
|
||||
If there are no row policies defined for a table then any user can `SELECT` all the row from the table. Defining one or more row policies for the table makes the access to the table depending on the row policies no matter if those row policies are defined for the current user or not. For example, the following policy
|
||||
If there are no row policies defined for a table, then any user can `SELECT` all the rows from the table. Defining one or more row policies for the table makes access to the table dependent on the row policies, no matter if those row policies are defined for the current user or not. For example, the following policy:
|
||||
|
||||
`CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter`
|
||||
|
||||
forbids the users `mira` and `peter` to see the rows with `b != 1`, and any non-mentioned user (e.g., the user `paul`) will see no rows from `mydb.table1` at all.
|
||||
forbids the users `mira` and `peter` from seeing the rows with `b != 1`, and any non-mentioned user (e.g., the user `paul`) will see no rows from `mydb.table1` at all.
|
||||
|
||||
If that's not desirable it can't be fixed by adding one more row policy, like the following:
|
||||
If that's not desirable, it can be fixed by adding one more row policy, like the following:
|
||||
|
||||
`CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter`
|
||||
:::
|
||||
|
||||
## AS Clause
|
||||
|
||||
It's allowed to have more than one policy enabled on the same table for the same user at the one time. So we need a way to combine the conditions from multiple policies.
|
||||
It's allowed to have more than one policy enabled on the same table for the same user at one time. So we need a way to combine the conditions from multiple policies.
|
||||
|
||||
By default policies are combined using the boolean `OR` operator. For example, the following policies
|
||||
By default, policies are combined using the boolean `OR` operator. For example, the following policies:
|
||||
|
||||
``` sql
|
||||
CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter
|
||||
CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 TO peter, antonio
|
||||
```
|
||||
|
||||
enables the user `peter` to see rows with either `b=1` or `c=2`.
|
||||
enable the user `peter` to see rows with either `b=1` or `c=2`.
|
||||
|
||||
The `AS` clause specifies how policies should be combined with other policies. Policies can be either permissive or restrictive. By default policies are permissive, which means they are combined using the boolean `OR` operator.
|
||||
The `AS` clause specifies how policies should be combined with other policies. Policies can be either permissive or restrictive. By default, policies are permissive, which means they are combined using the boolean `OR` operator.
|
||||
|
||||
A policy can be defined as restrictive as an alternative. Restrictive policies are combined using the boolean `AND` operator.
|
||||
|
||||
@ -68,25 +68,25 @@ row_is_visible = (one or more of the permissive policies' conditions are non-zer
|
||||
(all of the restrictive policies's conditions are non-zero)
|
||||
```
|
||||
|
||||
For example, the following policies
|
||||
For example, the following policies:
|
||||
|
||||
``` sql
|
||||
CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter
|
||||
CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio
|
||||
```
|
||||
|
||||
enables the user `peter` to see rows only if both `b=1` AND `c=2`.
|
||||
enable the user `peter` to see rows only if both `b=1` AND `c=2`.
|
||||
|
||||
Database policies are combined with table policies.
|
||||
|
||||
For example, the following policies
|
||||
For example, the following policies:
|
||||
|
||||
``` sql
|
||||
CREATE ROW POLICY pol1 ON mydb.* USING b=1 TO mira, peter
|
||||
CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio
|
||||
```
|
||||
|
||||
enables the user `peter` to see table1 rows only if both `b=1` AND `c=2`, although
|
||||
enable the user `peter` to see table1 rows only if both `b=1` AND `c=2`, although
|
||||
any other table in mydb would have only `b=1` policy applied for the user.
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@ title: The Lightweight DELETE Statement
|
||||
The lightweight `DELETE` statement removes rows from the table `[db.]table` that match the expression `expr`. It is only available for the *MergeTree table engine family.
|
||||
|
||||
``` sql
|
||||
DELETE FROM [db.]table [ON CLUSTER cluster] WHERE expr;
|
||||
DELETE FROM [db.]table [ON CLUSTER cluster] [IN PARTITION partition_expr] WHERE expr;
|
||||
```
|
||||
|
||||
It is called "lightweight `DELETE`" to contrast it to the [ALTER table DELETE](/en/sql-reference/statements/alter/delete) command, which is a heavyweight process.
|
||||
|
@ -6,35 +6,37 @@ sidebar_label: iceberg
|
||||
|
||||
# iceberg Table Function
|
||||
|
||||
Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3.
|
||||
Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure or locally stored.
|
||||
|
||||
## Syntax
|
||||
|
||||
``` sql
|
||||
iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure])
|
||||
icebergS3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,compression_method])
|
||||
icebergS3(named_collection[, option=value [,..]])
|
||||
|
||||
icebergAzure(connection_string|storage_account_url, container_name, blobpath, [,account_name], [,account_key] [,format] [,compression_method])
|
||||
icebergAzure(named_collection[, option=value [,..]])
|
||||
|
||||
icebergLocal(path_to_table, [,format] [,compression_method])
|
||||
icebergLocal(named_collection[, option=value [,..]])
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `url` — Bucket url with the path to an existing Iceberg table in S3.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
|
||||
Description of the arguments coincides with description of arguments in table functions `s3`, `azureBlobStorage` and `file` correspondingly.
|
||||
`format` stands for the format of data files in the Iceberg table.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading data in the specified Iceberg table in S3.
|
||||
A table with the specified structure for reading data in the specified Iceberg table.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT * FROM iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
|
||||
SELECT * FROM icebergS3('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
|
||||
```
|
||||
|
||||
:::important
|
||||
ClickHouse currently supports reading v1 (v2 support is coming soon!) of the Iceberg format via the `iceberg` table function and `Iceberg` table engine.
|
||||
ClickHouse currently supports reading v1 and v2 of the Iceberg format via the `icebergS3`, `icebergAzure` and `icebergLocal` table functions and `IcebergS3`, `icebergAzure` ans `icebergLocal` table engines.
|
||||
:::
|
||||
|
||||
## Defining a named collection
|
||||
@ -56,10 +58,14 @@ Here is an example of configuring a named collection for storing the URL and cre
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT * FROM iceberg(iceberg_conf, filename = 'test_table')
|
||||
DESCRIBE iceberg(iceberg_conf, filename = 'test_table')
|
||||
SELECT * FROM icebergS3(iceberg_conf, filename = 'test_table')
|
||||
DESCRIBE icebergS3(iceberg_conf, filename = 'test_table')
|
||||
```
|
||||
|
||||
**Aliases**
|
||||
|
||||
Table function `iceberg` is an alias to `icebergS3` now.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Iceberg engine](/docs/en/engines/table-engines/integrations/iceberg.md)
|
||||
|
@ -54,6 +54,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
|
||||
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
- `_headers` - HTTP response headers. Type: `Map(LowCardinality(String), LowCardinality(String))`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
|
@ -50,7 +50,7 @@ Connection: Close
|
||||
Content-Type: text/tab-separated-values; charset=UTF-8
|
||||
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
|
||||
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
|
||||
1
|
||||
```
|
||||
@ -367,7 +367,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0", "elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
# TYPE "Query" counter
|
||||
@ -601,7 +601,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
|
||||
@ -659,7 +659,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -678,7 +678,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
|
@ -124,6 +124,40 @@ SELECT hex(sipHash128('foo', '\x01', 3));
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## ripeMD160
|
||||
|
||||
Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
ripeMD160(input)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `input`: Строка [String](../data-types/string.md)
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- [UInt256](../data-types/int-uint.md), где 160-битный хеш RIPEMD-160 хранится в первых 20 байтах. Оставшиеся 12 байт заполняются нулями.
|
||||
|
||||
**Пример**
|
||||
|
||||
Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
|
||||
```
|
||||
Результат:
|
||||
```response
|
||||
┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
|
||||
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
|
||||
└───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## cityHash64 {#cityhash64}
|
||||
|
||||
Генерирует 64-х битное значение [CityHash](https://github.com/google/cityhash).
|
||||
|
@ -280,7 +280,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge
|
||||
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, но не более `receive_timeout` секунд:
|
||||
|
||||
``` sql
|
||||
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
|
||||
SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
|
||||
```
|
||||
|
||||
После выполнения этого запроса таблица `[db.]replicated_merge_tree_family_table_name` загружает команды из общего реплицированного лога в свою собственную очередь репликации. Затем запрос ждет, пока реплика не обработает все загруженные команды. Поддерживаются следующие модификаторы:
|
||||
|
@ -53,7 +53,7 @@ Connection: Close
|
||||
Content-Type: text/tab-separated-values; charset=UTF-8
|
||||
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
|
||||
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","real_time_microseconds":"0"}
|
||||
|
||||
1
|
||||
```
|
||||
@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
# TYPE "Query" counter
|
||||
@ -524,7 +524,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
Say Hi!%
|
||||
@ -564,7 +564,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","real_time_microseconds":"0"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
|
||||
@ -616,7 +616,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","real_time_microseconds":"0"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -635,7 +635,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","real_time_microseconds":"0"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
|
@ -1157,7 +1157,7 @@ SELECT toModifiedJulianDayOrNull('2020-01-01');
|
||||
|
||||
## fromModifiedJulianDay {#frommodifiedjulianday}
|
||||
|
||||
将 [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) 数字转换为 `YYYY-MM-DD` 文本格式的 [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) 日期。该函数支持从 `-678941` 到 `2973119` 的天数(分别代表 0000-01-01 和 9999-12-31)。如果天数超出支持范围,则会引发异常。
|
||||
将 [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) 数字转换为 `YYYY-MM-DD` 文本格式的 [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) 日期。该函数支持从 `-678941` 到 `2973483` 的天数(分别代表 0000-01-01 和 9999-12-31)。如果天数超出支持范围,则会引发异常。
|
||||
|
||||
**语法**
|
||||
|
||||
|
@ -677,4 +677,122 @@ void GetAllChildrenNumberCommand::execute(const ASTKeeperQuery * query, KeeperCl
|
||||
std::cout << totalNumChildren << "\n";
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class CPMVOperation
|
||||
{
|
||||
constexpr static UInt64 kTryLimit = 1000;
|
||||
|
||||
public:
|
||||
CPMVOperation(String src_, String dest_, bool remove_src_, KeeperClient * client_)
|
||||
: src(std::move(src_)), dest(std::move(dest_)), remove_src(remove_src_), client(client_)
|
||||
{
|
||||
}
|
||||
|
||||
bool isTryLimitReached() const { return failed_tries_count >= kTryLimit; }
|
||||
|
||||
bool isCompleted() const { return is_completed; }
|
||||
|
||||
void perform()
|
||||
{
|
||||
Coordination::Stat src_stat;
|
||||
String data = client->zookeeper->get(src, &src_stat);
|
||||
|
||||
Coordination::Requests ops{
|
||||
zkutil::makeCheckRequest(src, src_stat.version),
|
||||
zkutil::makeCreateRequest(dest, data, zkutil::CreateMode::Persistent), // Do we need to copy ACLs here?
|
||||
};
|
||||
|
||||
if (remove_src)
|
||||
ops.push_back(zkutil::makeRemoveRequest(src, src_stat.version));
|
||||
|
||||
Coordination::Responses responses;
|
||||
auto code = client->zookeeper->tryMulti(ops, responses);
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case Coordination::Error::ZOK: {
|
||||
is_completed = true;
|
||||
return;
|
||||
}
|
||||
case Coordination::Error::ZBADVERSION: {
|
||||
++failed_tries_count;
|
||||
|
||||
if (isTryLimitReached())
|
||||
zkutil::KeeperMultiException::check(code, ops, responses);
|
||||
|
||||
return;
|
||||
}
|
||||
default:
|
||||
zkutil::KeeperMultiException::check(code, ops, responses);
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable");
|
||||
}
|
||||
|
||||
private:
|
||||
String src;
|
||||
String dest;
|
||||
bool remove_src = false;
|
||||
KeeperClient * client = nullptr;
|
||||
|
||||
bool is_completed = false;
|
||||
uint64_t failed_tries_count = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
bool CPCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, [[maybe_unused]] Expected & expected) const
|
||||
{
|
||||
String src_path;
|
||||
if (!parseKeeperPath(pos, expected, src_path))
|
||||
return false;
|
||||
node->args.push_back(std::move(src_path));
|
||||
|
||||
String to_path;
|
||||
if (!parseKeeperPath(pos, expected, to_path))
|
||||
return false;
|
||||
node->args.push_back(std::move(to_path));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CPCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
auto src = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
auto dest = client->getAbsolutePath(query->args[1].safeGet<String>());
|
||||
|
||||
CPMVOperation operation(std::move(src), std::move(dest), /*remove_src_=*/false, /*client_=*/client);
|
||||
|
||||
while (!operation.isTryLimitReached() && !operation.isCompleted())
|
||||
operation.perform();
|
||||
}
|
||||
|
||||
bool MVCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String src_path;
|
||||
if (!parseKeeperPath(pos, expected, src_path))
|
||||
return false;
|
||||
node->args.push_back(std::move(src_path));
|
||||
|
||||
String to_path;
|
||||
if (!parseKeeperPath(pos, expected, to_path))
|
||||
return false;
|
||||
node->args.push_back(std::move(to_path));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MVCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
auto src = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
auto dest = client->getAbsolutePath(query->args[1].safeGet<String>());
|
||||
|
||||
CPMVOperation operation(std::move(src), std::move(dest), /*remove_src_=*/true, /*client_=*/client);
|
||||
|
||||
while (!operation.isTryLimitReached() && !operation.isCompleted())
|
||||
operation.perform();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -266,4 +266,32 @@ class GetAllChildrenNumberCommand : public IKeeperClientCommand
|
||||
}
|
||||
};
|
||||
|
||||
class CPCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "cp"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override
|
||||
{
|
||||
return "{} <src> <dest> -- Copies 'src' node to 'dest' path.";
|
||||
}
|
||||
};
|
||||
|
||||
class MVCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "mv"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override
|
||||
{
|
||||
return "{} <src> <dest> -- Moves 'src' node to the 'dest' path.";
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -212,6 +212,8 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
|
||||
std::make_shared<FourLetterWordCommand>(),
|
||||
std::make_shared<GetDirectChildrenNumberCommand>(),
|
||||
std::make_shared<GetAllChildrenNumberCommand>(),
|
||||
std::make_shared<CPCommand>(),
|
||||
std::make_shared<MVCommand>(),
|
||||
});
|
||||
|
||||
String home_path;
|
||||
|
@ -978,6 +978,7 @@ try
|
||||
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
|
||||
* At this moment, no one could own shared part of Context.
|
||||
*/
|
||||
global_context->resetSharedContext();
|
||||
global_context.reset();
|
||||
shared_context.reset();
|
||||
LOG_DEBUG(log, "Destroyed global context.");
|
||||
|
@ -209,7 +209,7 @@ std::map<std::pair<TypeIndex, String>, NodeToSubcolumnTransformer> node_transfor
|
||||
},
|
||||
};
|
||||
|
||||
std::tuple<FunctionNode *, ColumnNode *, TableNode *> getTypedNodesForOptimization(const QueryTreeNodePtr & node)
|
||||
std::tuple<FunctionNode *, ColumnNode *, TableNode *> getTypedNodesForOptimization(const QueryTreeNodePtr & node, const ContextPtr & context)
|
||||
{
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
@ -232,6 +232,12 @@ std::tuple<FunctionNode *, ColumnNode *, TableNode *> getTypedNodesForOptimizati
|
||||
const auto & storage_snapshot = table_node->getStorageSnapshot();
|
||||
auto column = first_argument_column_node->getColumn();
|
||||
|
||||
/// If view source is set we cannot optimize because it doesn't support moving functions to subcolumns.
|
||||
/// The storage is replaced to the view source but it happens only after building a query tree and applying passes.
|
||||
auto view_source = context->getViewSource();
|
||||
if (view_source && view_source->getStorageID().getFullNameNotQuoted() == storage->getStorageID().getFullNameNotQuoted())
|
||||
return {};
|
||||
|
||||
if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata))
|
||||
return {};
|
||||
|
||||
@ -266,7 +272,7 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node);
|
||||
auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node, getContext());
|
||||
if (function_node && first_argument_node && table_node)
|
||||
{
|
||||
enterImpl(*function_node, *first_argument_node, *table_node);
|
||||
@ -416,7 +422,7 @@ public:
|
||||
if (!getSettings().optimize_functions_to_subcolumns)
|
||||
return;
|
||||
|
||||
auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node);
|
||||
auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node, getContext());
|
||||
if (!function_node || !first_argument_column_node || !table_node)
|
||||
return;
|
||||
|
||||
|
@ -100,6 +100,7 @@ protected:
|
||||
auto buf = BuilderRWBufferFromHTTP(getPingURI())
|
||||
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
|
||||
.withTimeouts(getHTTPTimeouts())
|
||||
.withSettings(getContext()->getReadSettings())
|
||||
.create(credentials);
|
||||
|
||||
return checkString(PING_OK_ANSWER, *buf);
|
||||
@ -206,6 +207,7 @@ protected:
|
||||
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
|
||||
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
|
||||
.withTimeouts(getHTTPTimeouts())
|
||||
.withSettings(getContext()->getReadSettings())
|
||||
.create(credentials);
|
||||
|
||||
bool res = false;
|
||||
@ -232,6 +234,7 @@ protected:
|
||||
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
|
||||
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
|
||||
.withTimeouts(getHTTPTimeouts())
|
||||
.withSettings(getContext()->getReadSettings())
|
||||
.create(credentials);
|
||||
|
||||
std::string character;
|
||||
|
@ -111,6 +111,7 @@ add_headers_and_sources(dbms Storages/ObjectStorage)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/Azure)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/S3)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/Local)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
|
||||
add_headers_and_sources(dbms Common/NamedCollections)
|
||||
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <Parsers/Access/ASTCreateUserQuery.h>
|
||||
#include <Parsers/Access/ASTAuthenticationData.h>
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
#include <Parsers/ASTExplainQuery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTUseQuery.h>
|
||||
@ -2111,6 +2112,15 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
|
||||
// - Other formats (e.g. FORMAT CSV) are arbitrarily more complex and tricky to parse. For example, we may be unable to distinguish if the semicolon
|
||||
// is part of the data or ends the statement. In this case, we simply assume that the end of the INSERT statement is determined by \n\n (two newlines).
|
||||
auto * insert_ast = parsed_query->as<ASTInsertQuery>();
|
||||
// We also consider the INSERT query in EXPLAIN queries (same as normal INSERT queries)
|
||||
if (!insert_ast)
|
||||
{
|
||||
auto * explain_ast = parsed_query->as<ASTExplainQuery>();
|
||||
if (explain_ast && explain_ast->getExplainedQuery())
|
||||
{
|
||||
insert_ast = explain_ast->getExplainedQuery()->as<ASTInsertQuery>();
|
||||
}
|
||||
}
|
||||
const char * query_to_execute_end = this_query_end;
|
||||
if (insert_ast && insert_ast->data)
|
||||
{
|
||||
|
@ -145,6 +145,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
|
||||
/// work we need to pass host name separately. It will be send into TLS Hello packet to let
|
||||
/// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI).
|
||||
static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setPeerHostName(host);
|
||||
/// we want to postpone SSL handshake until first read or write operation
|
||||
/// so any errors during negotiation would be properly processed
|
||||
static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setLazyHandshake(true);
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "tcp_secure protocol is disabled because poco library was built without NetSSL support.");
|
||||
#endif
|
||||
|
@ -299,13 +299,14 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
Patterns delimiters_,
|
||||
const char word_break_characters_[],
|
||||
replxx::Replxx::highlighter_callback_t highlighter_,
|
||||
[[ maybe_unused ]] std::istream & input_stream_,
|
||||
[[ maybe_unused ]] std::ostream & output_stream_,
|
||||
[[ maybe_unused ]] int in_fd_,
|
||||
[[ maybe_unused ]] int out_fd_,
|
||||
[[ maybe_unused ]] int err_fd_
|
||||
std::istream & input_stream_,
|
||||
std::ostream & output_stream_,
|
||||
int in_fd_,
|
||||
int out_fd_,
|
||||
int err_fd_
|
||||
)
|
||||
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_)
|
||||
, rx(input_stream_, output_stream_, in_fd_, out_fd_, err_fd_)
|
||||
, highlighter(std::move(highlighter_))
|
||||
, word_break_characters(word_break_characters_)
|
||||
, editor(getEditor())
|
||||
@ -516,7 +517,7 @@ void ReplxxLineReader::addToHistory(const String & line)
|
||||
rx.history_add(line);
|
||||
|
||||
// flush changes to the disk
|
||||
if (!rx.history_save(history_file_path))
|
||||
if (history_file_fd >= 0 && !rx.history_save(history_file_path))
|
||||
rx.print("Saving history failed: %s\n", errnoToString().c_str());
|
||||
|
||||
if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN))
|
||||
|
@ -300,7 +300,7 @@ void ColumnDynamic::get(size_t n, Field & res) const
|
||||
auto value_data = shared_variant.getDataAt(variant_col.offsetAt(n));
|
||||
ReadBufferFromMemory buf(value_data.data, value_data.size);
|
||||
auto type = decodeDataType(buf);
|
||||
getVariantSerialization(type)->deserializeBinary(res, buf, getFormatSettings());
|
||||
type->getDefaultSerialization()->deserializeBinary(res, buf, getFormatSettings());
|
||||
}
|
||||
|
||||
|
||||
@ -736,8 +736,7 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, Arena & arena, const
|
||||
{
|
||||
const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(discr);
|
||||
encodeDataType(variant_type, buf);
|
||||
getVariantSerialization(variant_type, variant_info.variant_names[discr])
|
||||
->serializeBinary(variant_col.getVariantByGlobalDiscriminator(discr), variant_col.offsetAt(n), buf, getFormatSettings());
|
||||
variant_type->getDefaultSerialization()->serializeBinary(variant_col.getVariantByGlobalDiscriminator(discr), variant_col.offsetAt(n), buf, getFormatSettings());
|
||||
type_and_value = buf.str();
|
||||
}
|
||||
|
||||
@ -870,7 +869,7 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_
|
||||
/// We have both values serialized in binary format, so we need to
|
||||
/// create temporary column, insert both values into it and compare.
|
||||
auto tmp_column = left_data_type->createColumn();
|
||||
const auto & serialization = getVariantSerialization(left_data_type, left_data_type_name);
|
||||
const auto & serialization = left_data_type->getDefaultSerialization();
|
||||
serialization->deserializeBinary(*tmp_column, buf_left, getFormatSettings());
|
||||
serialization->deserializeBinary(*tmp_column, buf_right, getFormatSettings());
|
||||
return tmp_column->compareAt(0, 1, *tmp_column, nan_direction_hint);
|
||||
@ -892,7 +891,7 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_
|
||||
/// We have left value serialized in binary format, we need to
|
||||
/// create temporary column, insert the value into it and compare.
|
||||
auto tmp_column = left_data_type->createColumn();
|
||||
getVariantSerialization(left_data_type, left_data_type_name)->deserializeBinary(*tmp_column, buf_left, getFormatSettings());
|
||||
left_data_type->getDefaultSerialization()->deserializeBinary(*tmp_column, buf_left, getFormatSettings());
|
||||
return tmp_column->compareAt(0, right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint);
|
||||
}
|
||||
/// Check if only right value is in shared data.
|
||||
@ -912,7 +911,7 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_
|
||||
/// We have right value serialized in binary format, we need to
|
||||
/// create temporary column, insert the value into it and compare.
|
||||
auto tmp_column = right_data_type->createColumn();
|
||||
getVariantSerialization(right_data_type, right_data_type_name)->deserializeBinary(*tmp_column, buf_right, getFormatSettings());
|
||||
right_data_type->getDefaultSerialization()->deserializeBinary(*tmp_column, buf_right, getFormatSettings());
|
||||
return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), 0, *tmp_column, nan_direction_hint);
|
||||
}
|
||||
/// Otherwise both values are regular variants.
|
||||
|
@ -414,7 +414,7 @@ public:
|
||||
/// Insert value into shared variant. Also updates Variant discriminators and offsets.
|
||||
void insertValueIntoSharedVariant(const IColumn & src, const DataTypePtr & type, const String & type_name, size_t n);
|
||||
|
||||
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type, const String & variant_name) const
|
||||
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type, const String & variant_name)
|
||||
{
|
||||
/// Get serialization for provided data type.
|
||||
/// To avoid calling type->getDefaultSerialization() every time we use simple cache with max size.
|
||||
@ -428,7 +428,7 @@ public:
|
||||
return serialization_cache.emplace(variant_name, variant_type->getDefaultSerialization()).first->second;
|
||||
}
|
||||
|
||||
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) const { return getVariantSerialization(variant_type, variant_type->getName()); }
|
||||
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) { return getVariantSerialization(variant_type, variant_type->getName()); }
|
||||
|
||||
private:
|
||||
void createVariantInfo(const DataTypePtr & variant_type);
|
||||
@ -473,7 +473,7 @@ private:
|
||||
/// We can use serializations of different data types to serialize values into shared variant.
|
||||
/// To avoid creating the same serialization multiple times, use simple cache.
|
||||
static const size_t SERIALIZATION_CACHE_MAX_SIZE = 256;
|
||||
mutable std::unordered_map<String, SerializationPtr> serialization_cache;
|
||||
std::unordered_map<String, SerializationPtr> serialization_cache;
|
||||
};
|
||||
|
||||
void extendVariantColumn(
|
||||
|
@ -34,13 +34,16 @@ bool ProgressIndication::updateProgress(const Progress & value)
|
||||
|
||||
void ProgressIndication::resetProgress()
|
||||
{
|
||||
watch.restart();
|
||||
progress.reset();
|
||||
show_progress_bar = false;
|
||||
written_progress_chars = 0;
|
||||
write_progress_on_update = false;
|
||||
{
|
||||
std::lock_guard lock(progress_mutex);
|
||||
progress.reset();
|
||||
show_progress_bar = false;
|
||||
written_progress_chars = 0;
|
||||
write_progress_on_update = false;
|
||||
}
|
||||
{
|
||||
std::lock_guard lock(profile_events_mutex);
|
||||
watch.restart();
|
||||
cpu_usage_meter.reset(getElapsedNanoseconds());
|
||||
hosts_data.clear();
|
||||
}
|
||||
@ -90,6 +93,8 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
|
||||
|
||||
void ProgressIndication::writeFinalProgress()
|
||||
{
|
||||
std::lock_guard lock(progress_mutex);
|
||||
|
||||
if (progress.read_rows < 1000)
|
||||
return;
|
||||
|
||||
@ -271,6 +276,8 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
|
||||
|
||||
void ProgressIndication::clearProgressOutput(WriteBufferFromFileDescriptor & message)
|
||||
{
|
||||
std::lock_guard lock(progress_mutex);
|
||||
|
||||
if (written_progress_chars)
|
||||
{
|
||||
written_progress_chars = 0;
|
||||
|
@ -115,6 +115,8 @@ private:
|
||||
/// It is possible concurrent access to the following:
|
||||
/// - writeProgress() (class properties) (guarded with progress_mutex)
|
||||
/// - hosts_data/cpu_usage_meter (guarded with profile_events_mutex)
|
||||
///
|
||||
/// It is also possible to have more races if query is cancelled, so that clearProgressOutput() is called concurrently
|
||||
mutable std::mutex profile_events_mutex;
|
||||
mutable std::mutex progress_mutex;
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Poco/Environment.h>
|
||||
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
|
||||
@ -371,8 +372,8 @@ try
|
||||
/// in case of double fault.
|
||||
|
||||
LOG_FATAL(log, "########## Short fault info ############");
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH,
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}, architecture: {}) (from thread {}) Received signal {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, Poco::Environment::osArchitecture(),
|
||||
thread_num, sig);
|
||||
|
||||
std::string signal_description = "Unknown signal";
|
||||
|
@ -1120,7 +1120,7 @@ class IColumn;
|
||||
M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
|
||||
M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
|
||||
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
|
||||
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
|
||||
M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHouse will use information about nullability from the data.", 0) \
|
||||
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include <cstring>
|
||||
#include <unistd.h>
|
||||
#include <algorithm>
|
||||
#include <typeinfo>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
|
@ -185,7 +185,7 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
|
||||
auto type = decodeDataType(buf);
|
||||
if (type->getName() == subcolumn_type_name)
|
||||
{
|
||||
dynamic_column.getVariantSerialization(subcolumn_type, subcolumn_type_name)->deserializeBinary(*subcolumn, buf, format_settings);
|
||||
subcolumn_type->getDefaultSerialization()->deserializeBinary(*subcolumn, buf, format_settings);
|
||||
null_map.push_back(0);
|
||||
}
|
||||
else
|
||||
|
@ -1,10 +1,12 @@
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeObject.h>
|
||||
#include <DataTypes/DataTypeObjectDeprecated.h>
|
||||
#include <DataTypes/Serializations/SerializationJSON.h>
|
||||
#include <DataTypes/Serializations/SerializationObjectTypedPath.h>
|
||||
#include <DataTypes/Serializations/SerializationObjectDynamicPath.h>
|
||||
#include <DataTypes/Serializations/SerializationSubObject.h>
|
||||
#include <Columns/ColumnObject.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
@ -513,13 +515,24 @@ static DataTypePtr createObject(const ASTPtr & arguments, const DataTypeObject::
|
||||
|
||||
static DataTypePtr createJSON(const ASTPtr & arguments)
|
||||
{
|
||||
auto context = CurrentThread::getQueryContext();
|
||||
if (!context)
|
||||
context = Context::getGlobalContextInstance();
|
||||
|
||||
if (context->getSettingsRef().use_json_alias_for_old_object_type)
|
||||
{
|
||||
if (arguments && !arguments->children.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Experimental Object type doesn't support any arguments. If you want to use new JSON type, set setting allow_experimental_json_type = 1");
|
||||
|
||||
return std::make_shared<DataTypeObjectDeprecated>("JSON", false);
|
||||
}
|
||||
|
||||
return createObject(arguments, DataTypeObject::SchemaFormat::JSON);
|
||||
}
|
||||
|
||||
void registerDataTypeJSON(DataTypeFactory & factory)
|
||||
{
|
||||
if (!Context::getGlobalContextInstance()->getSettingsRef().use_json_alias_for_old_object_type)
|
||||
factory.registerDataType("JSON", createJSON, DataTypeFactory::Case::Insensitive);
|
||||
factory.registerDataType("JSON", createJSON, DataTypeFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -78,10 +78,6 @@ static DataTypePtr create(const ASTPtr & arguments)
|
||||
void registerDataTypeObjectDeprecated(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerDataType("Object", create);
|
||||
if (Context::getGlobalContextInstance()->getSettingsRef().use_json_alias_for_old_object_type)
|
||||
factory.registerSimpleDataType("JSON",
|
||||
[] { return std::make_shared<DataTypeObjectDeprecated>("JSON", false); },
|
||||
DataTypeFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -489,9 +489,8 @@ void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_nu
|
||||
}
|
||||
|
||||
const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(global_discr);
|
||||
const auto & variant_type_name = variant_info.variant_names[global_discr];
|
||||
encodeDataType(variant_type, ostr);
|
||||
dynamic_column.getVariantSerialization(variant_type, variant_type_name)->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings);
|
||||
variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings);
|
||||
}
|
||||
|
||||
template <typename ReturnType = void, typename DeserializeFunc>
|
||||
@ -629,7 +628,7 @@ static void serializeTextImpl(
|
||||
ReadBufferFromMemory buf(value.data, value.size);
|
||||
auto variant_type = decodeDataType(buf);
|
||||
auto tmp_variant_column = variant_type->createColumn();
|
||||
auto variant_serialization = dynamic_column.getVariantSerialization(variant_type);
|
||||
auto variant_serialization = variant_type->getDefaultSerialization();
|
||||
variant_serialization->deserializeBinary(*tmp_variant_column, buf, settings);
|
||||
nested_serialize(*variant_serialization, *tmp_variant_column, 0, ostr);
|
||||
}
|
||||
|
@ -35,9 +35,10 @@ class RegionsNames
|
||||
M(et, ru, 11) \
|
||||
M(pt, en, 12) \
|
||||
M(he, en, 13) \
|
||||
M(vi, en, 14)
|
||||
M(vi, en, 14) \
|
||||
M(es, en, 15)
|
||||
|
||||
static constexpr size_t total_languages = 15;
|
||||
static constexpr size_t total_languages = 16;
|
||||
|
||||
public:
|
||||
enum class Language : size_t
|
||||
|
@ -43,39 +43,21 @@ bool LocalObjectStorage::exists(const StoredObject & object) const
|
||||
std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObjects( /// NOLINT
|
||||
const StoredObjects & objects,
|
||||
const ReadSettings & read_settings,
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const
|
||||
std::optional<size_t>,
|
||||
std::optional<size_t>) const
|
||||
{
|
||||
auto modified_settings = patchSettings(read_settings);
|
||||
auto global_context = Context::getGlobalContextInstance();
|
||||
auto read_buffer_creator =
|
||||
[=] (bool /* restricted_seek */, const StoredObject & object)
|
||||
-> std::unique_ptr<ReadBufferFromFileBase>
|
||||
{
|
||||
return createReadBufferFromFileBase(object.remote_path, modified_settings, read_hint, file_size);
|
||||
};
|
||||
auto read_buffer_creator = [=](bool /* restricted_seek */, const StoredObject & object) -> std::unique_ptr<ReadBufferFromFileBase>
|
||||
{ return std::make_unique<ReadBufferFromFile>(object.remote_path); };
|
||||
|
||||
switch (read_settings.remote_fs_method)
|
||||
{
|
||||
case RemoteFSReadMethod::read:
|
||||
{
|
||||
return std::make_unique<ReadBufferFromRemoteFSGather>(
|
||||
std::move(read_buffer_creator), objects, "file:", modified_settings,
|
||||
global_context->getFilesystemCacheLog(), /* use_external_buffer */false);
|
||||
}
|
||||
case RemoteFSReadMethod::threadpool:
|
||||
{
|
||||
auto impl = std::make_unique<ReadBufferFromRemoteFSGather>(
|
||||
std::move(read_buffer_creator), objects, "file:", modified_settings,
|
||||
global_context->getFilesystemCacheLog(), /* use_external_buffer */true);
|
||||
|
||||
auto & reader = global_context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
|
||||
return std::make_unique<AsynchronousBoundedReadBuffer>(
|
||||
std::move(impl), reader, read_settings,
|
||||
global_context->getAsyncReadCounters(),
|
||||
global_context->getFilesystemReadPrefetchesLog());
|
||||
}
|
||||
}
|
||||
return std::make_unique<ReadBufferFromRemoteFSGather>(
|
||||
std::move(read_buffer_creator),
|
||||
objects,
|
||||
"file:",
|
||||
modified_settings,
|
||||
global_context->getFilesystemCacheLog(),
|
||||
/* use_external_buffer */ false);
|
||||
}
|
||||
|
||||
ReadSettings LocalObjectStorage::patchSettings(const ReadSettings & read_settings) const
|
||||
|
@ -257,7 +257,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference;
|
||||
format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
|
||||
format_settings.schema_inference_hints = settings.schema_inference_hints;
|
||||
format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable;
|
||||
format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable.valueOr(2);
|
||||
format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name;
|
||||
format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names;
|
||||
format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size;
|
||||
|
@ -77,7 +77,7 @@ struct FormatSettings
|
||||
Raw
|
||||
};
|
||||
|
||||
bool schema_inference_make_columns_nullable = true;
|
||||
UInt64 schema_inference_make_columns_nullable = 1;
|
||||
|
||||
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
|
||||
|
||||
|
@ -1179,6 +1179,12 @@ public:
|
||||
const FormatSettings & format_settings,
|
||||
String & error) const override
|
||||
{
|
||||
if (element.isNull() && format_settings.null_as_default)
|
||||
{
|
||||
column.insertDefault();
|
||||
return true;
|
||||
}
|
||||
|
||||
auto & tuple = assert_cast<ColumnTuple &>(column);
|
||||
size_t old_size = column.size();
|
||||
bool were_valid_elements = false;
|
||||
@ -1298,6 +1304,12 @@ public:
|
||||
const FormatSettings & format_settings,
|
||||
String & error) const override
|
||||
{
|
||||
if (element.isNull() && format_settings.null_as_default)
|
||||
{
|
||||
column.insertDefault();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!element.isObject())
|
||||
{
|
||||
error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString<JSONParser>(element, format_settings));
|
||||
@ -1362,6 +1374,14 @@ public:
|
||||
String & error) const override
|
||||
{
|
||||
auto & column_variant = assert_cast<ColumnVariant &>(column);
|
||||
|
||||
/// Check if element is NULL.
|
||||
if (element.isNull())
|
||||
{
|
||||
column_variant.insertDefault();
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i : order)
|
||||
{
|
||||
auto & variant = column_variant.getVariantByGlobalDiscriminator(i);
|
||||
|
@ -1344,7 +1344,11 @@ namespace
|
||||
if (checkCharCaseInsensitive('n', buf))
|
||||
{
|
||||
if (checkStringCaseInsensitive("ull", buf))
|
||||
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
|
||||
{
|
||||
if (settings.schema_inference_make_columns_nullable == 0)
|
||||
return std::make_shared<DataTypeNothing>();
|
||||
return makeNullable(std::make_shared<DataTypeNothing>());
|
||||
}
|
||||
else if (checkStringCaseInsensitive("an", buf))
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
@ -19,7 +19,9 @@
|
||||
#include <Common/HashTable/Hash.h>
|
||||
|
||||
#if USE_SSL
|
||||
# include <openssl/evp.h>
|
||||
# include <openssl/md5.h>
|
||||
# include <openssl/ripemd.h>
|
||||
#endif
|
||||
|
||||
#include <bit>
|
||||
@ -196,6 +198,34 @@ T combineHashesFunc(T t1, T t2)
|
||||
return HashFunction::apply(reinterpret_cast<const char *>(hashes), sizeof(hashes));
|
||||
}
|
||||
|
||||
#if USE_SSL
|
||||
struct RipeMD160Impl
|
||||
{
|
||||
static constexpr auto name = "ripeMD160";
|
||||
using ReturnType = UInt256;
|
||||
|
||||
static UInt256 apply(const char * begin, size_t size)
|
||||
{
|
||||
UInt8 digest[RIPEMD160_DIGEST_LENGTH];
|
||||
|
||||
RIPEMD160(reinterpret_cast<const unsigned char *>(begin), size, reinterpret_cast<unsigned char *>(digest));
|
||||
|
||||
std::reverse(digest, digest + RIPEMD160_DIGEST_LENGTH);
|
||||
|
||||
UInt256 res = 0;
|
||||
std::memcpy(&res, digest, RIPEMD160_DIGEST_LENGTH);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static UInt256 combineHashes(UInt256 h1, UInt256 h2)
|
||||
{
|
||||
return combineHashesFunc<UInt256, RipeMD160Impl>(h1, h2);
|
||||
}
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct SipHash64Impl
|
||||
{
|
||||
@ -1624,6 +1654,7 @@ using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
|
||||
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
|
||||
#if USE_SSL
|
||||
using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;
|
||||
using FunctionRipeMD160Hash = FunctionAnyHash<RipeMD160Impl>;
|
||||
#endif
|
||||
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
|
||||
@ -1652,6 +1683,7 @@ using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
|
||||
using FunctionXXH3 = FunctionAnyHash<ImplXXH3>;
|
||||
|
||||
using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
|
||||
|
||||
}
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
|
23
src/Functions/FunctionsHashingRipe.cpp
Normal file
23
src/Functions/FunctionsHashingRipe.cpp
Normal file
@ -0,0 +1,23 @@
|
||||
#include "FunctionsHashing.h"
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
/// FunctionsHashing instantiations are separated into files FunctionsHashing*.cpp
|
||||
/// to better parallelize the build procedure and avoid MSan build failure
|
||||
/// due to excessive resource consumption.
|
||||
namespace DB
|
||||
{
|
||||
#if USE_SSL
|
||||
REGISTER_FUNCTION(HashingRipe)
|
||||
{
|
||||
factory.registerFunction<FunctionRipeMD160Hash>(FunctionDocumentation{
|
||||
.description = "RIPEMD-160 hash function, primarily used in Bitcoin address generation.",
|
||||
.examples{{"", "SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));", R"(
|
||||
┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
|
||||
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
|
||||
└───────────────────────────────────────────────────────────────┘
|
||||
)"}},
|
||||
.categories{"Hash"}});
|
||||
}
|
||||
#endif
|
||||
}
|
@ -284,12 +284,12 @@ void OrdinalDate::init(int64_t modified_julian_day)
|
||||
|
||||
bool OrdinalDate::tryInit(int64_t modified_julian_day)
|
||||
{
|
||||
/// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
|
||||
/// This function supports day number from -678941 to 2973483 (which represent 0000-01-01 and 9999-12-31 respectively).
|
||||
|
||||
if (modified_julian_day < -678941)
|
||||
return false;
|
||||
|
||||
if (modified_julian_day > 2973119)
|
||||
if (modified_julian_day > 2973483)
|
||||
return false;
|
||||
|
||||
const auto a = modified_julian_day + 678575;
|
||||
|
@ -4,17 +4,21 @@
|
||||
|
||||
#if USE_ICU
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Functions/LowerUpperImpl.h>
|
||||
#include <unicode/unistr.h>
|
||||
#include <Common/StringUtils.h>
|
||||
# include <Columns/ColumnString.h>
|
||||
# include <Functions/LowerUpperImpl.h>
|
||||
# include <unicode/ucasemap.h>
|
||||
# include <unicode/unistr.h>
|
||||
# include <unicode/urename.h>
|
||||
# include <unicode/utypes.h>
|
||||
# include <Common/StringUtils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
template <char not_case_lower_bound, char not_case_upper_bound, bool upper>
|
||||
@ -27,7 +31,7 @@ struct LowerUpperUTF8Impl
|
||||
ColumnString::Offsets & res_offsets,
|
||||
size_t input_rows_count)
|
||||
{
|
||||
if (data.empty())
|
||||
if (input_rows_count == 0)
|
||||
return;
|
||||
|
||||
bool all_ascii = isAllASCII(data.data(), data.size());
|
||||
@ -38,39 +42,56 @@ struct LowerUpperUTF8Impl
|
||||
}
|
||||
|
||||
res_data.resize(data.size());
|
||||
res_offsets.resize_exact(offsets.size());
|
||||
res_offsets.resize_exact(input_rows_count);
|
||||
|
||||
UErrorCode error_code = U_ZERO_ERROR;
|
||||
UCaseMap * case_map = ucasemap_open("", U_FOLD_CASE_DEFAULT, &error_code);
|
||||
if (U_FAILURE(error_code))
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Error calling ucasemap_open: {}", u_errorName(error_code));
|
||||
|
||||
String output;
|
||||
size_t curr_offset = 0;
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
for (size_t row_i = 0; row_i < input_rows_count; ++row_i)
|
||||
{
|
||||
const auto * data_start = reinterpret_cast<const char *>(&data[offsets[i - 1]]);
|
||||
size_t size = offsets[i] - offsets[i - 1];
|
||||
const auto * src = reinterpret_cast<const char *>(&data[offsets[row_i - 1]]);
|
||||
size_t src_size = offsets[row_i] - offsets[row_i - 1] - 1;
|
||||
|
||||
icu::UnicodeString input(data_start, static_cast<int32_t>(size), "UTF-8");
|
||||
int32_t dst_size;
|
||||
if constexpr (upper)
|
||||
input.toUpper();
|
||||
dst_size = ucasemap_utf8ToUpper(
|
||||
case_map, reinterpret_cast<char *>(&res_data[curr_offset]), res_data.size() - curr_offset, src, src_size, &error_code);
|
||||
else
|
||||
input.toLower();
|
||||
dst_size = ucasemap_utf8ToLower(
|
||||
case_map, reinterpret_cast<char *>(&res_data[curr_offset]), res_data.size() - curr_offset, src, src_size, &error_code);
|
||||
|
||||
output.clear();
|
||||
input.toUTF8String(output);
|
||||
if (error_code == U_BUFFER_OVERFLOW_ERROR || error_code == U_STRING_NOT_TERMINATED_WARNING)
|
||||
{
|
||||
size_t new_size = curr_offset + dst_size + 1;
|
||||
res_data.resize(new_size);
|
||||
|
||||
/// For valid UTF-8 input strings, ICU sometimes produces output with an extra '\0 at the end. Only the data before that
|
||||
/// '\0' is valid. If the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this
|
||||
/// case, the behavior is also reasonable.
|
||||
size_t valid_size = output.size();
|
||||
if (!output.empty() && output.back() == '\0')
|
||||
--valid_size;
|
||||
error_code = U_ZERO_ERROR;
|
||||
if constexpr (upper)
|
||||
dst_size = ucasemap_utf8ToUpper(
|
||||
case_map, reinterpret_cast<char *>(&res_data[curr_offset]), res_data.size() - curr_offset, src, src_size, &error_code);
|
||||
else
|
||||
dst_size = ucasemap_utf8ToLower(
|
||||
case_map, reinterpret_cast<char *>(&res_data[curr_offset]), res_data.size() - curr_offset, src, src_size, &error_code);
|
||||
}
|
||||
|
||||
res_data.resize(curr_offset + valid_size + 1);
|
||||
if (error_code != U_ZERO_ERROR)
|
||||
throw DB::Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Error calling {}: {} input: {} input_size: {}",
|
||||
upper ? "ucasemap_utf8ToUpper" : "ucasemap_utf8ToLower",
|
||||
u_errorName(error_code),
|
||||
std::string_view(src, src_size),
|
||||
src_size);
|
||||
|
||||
memcpy(&res_data[curr_offset], output.data(), valid_size);
|
||||
res_data[curr_offset + valid_size] = 0;
|
||||
|
||||
curr_offset += valid_size + 1;
|
||||
res_offsets[i] = curr_offset;
|
||||
res_data[curr_offset + dst_size] = 0;
|
||||
curr_offset += dst_size + 1;
|
||||
res_offsets[row_i] = curr_offset;
|
||||
}
|
||||
|
||||
res_data.resize(curr_offset);
|
||||
}
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
|
||||
|
@ -406,7 +406,7 @@ void UserDefinedSQLObjectsZooKeeperStorage::syncObjects(const zkutil::ZooKeeperP
|
||||
LOG_DEBUG(log, "Syncing user-defined {} objects", object_type);
|
||||
Strings object_names = getObjectNamesAndSetWatch(zookeeper, object_type);
|
||||
|
||||
getLock();
|
||||
auto lock = getLock();
|
||||
|
||||
/// Remove stale objects
|
||||
removeAllObjectsExcept(object_names);
|
||||
|
@ -1598,6 +1598,9 @@ ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & argu
|
||||
const auto & tuple_columns = col_nested->getColumns();
|
||||
size_t tuple_size = tuple_columns.size();
|
||||
|
||||
if (tuple_size == 0)
|
||||
return ColumnTuple::create(input_rows_count);
|
||||
|
||||
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(
|
||||
*typeid_cast<const DataTypeArray &>(*arguments[0].type).getNestedType()).getElements();
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDate32.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeInterval.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
@ -43,6 +44,7 @@ public:
|
||||
enum ResultType
|
||||
{
|
||||
Date,
|
||||
Date32,
|
||||
DateTime,
|
||||
DateTime64,
|
||||
};
|
||||
@ -75,15 +77,15 @@ public:
|
||||
|
||||
bool second_argument_is_date = false;
|
||||
auto check_second_argument = [&] {
|
||||
if (!isDate(arguments[1].type) && !isDateTime(arguments[1].type) && !isDateTime64(arguments[1].type))
|
||||
if (!isDateOrDate32(arguments[1].type) && !isDateTime(arguments[1].type) && !isDateTime64(arguments[1].type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}. "
|
||||
"Should be a date or a date with time", arguments[1].type->getName(), getName());
|
||||
|
||||
second_argument_is_date = isDate(arguments[1].type);
|
||||
second_argument_is_date = isDateOrDate32(arguments[1].type);
|
||||
|
||||
if (second_argument_is_date && ((datepart_kind == IntervalKind::Kind::Hour)
|
||||
|| (datepart_kind == IntervalKind::Kind::Minute) || (datepart_kind == IntervalKind::Kind::Second)))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date of argument for function {}", getName());
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for function {}", arguments[1].type->getName(), getName());
|
||||
};
|
||||
|
||||
auto check_timezone_argument = [&] {
|
||||
@ -119,6 +121,8 @@ public:
|
||||
|
||||
if (result_type == ResultType::Date)
|
||||
return std::make_shared<DataTypeDate>();
|
||||
if (result_type == ResultType::Date32)
|
||||
return std::make_shared<DataTypeDate32>();
|
||||
else if (result_type == ResultType::DateTime)
|
||||
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 1, false));
|
||||
else
|
||||
|
@ -44,9 +44,9 @@ public:
|
||||
auto check_first_argument = [&]
|
||||
{
|
||||
const DataTypePtr & type_arg1 = arguments[0].type;
|
||||
if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1))
|
||||
if (!isDateOrDate32(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of 1st argument of function {}, expected a Date, DateTime or DateTime64",
|
||||
"Illegal type {} of 1st argument of function {}, expected a Date, Date32, DateTime or DateTime64",
|
||||
type_arg1->getName(), getName());
|
||||
value_is_date = isDate(type_arg1);
|
||||
};
|
||||
@ -56,6 +56,7 @@ public:
|
||||
enum class ResultType : uint8_t
|
||||
{
|
||||
Date,
|
||||
Date32,
|
||||
DateTime,
|
||||
DateTime64
|
||||
};
|
||||
@ -128,6 +129,8 @@ public:
|
||||
{
|
||||
case ResultType::Date:
|
||||
return std::make_shared<DataTypeDate>();
|
||||
case ResultType::Date32:
|
||||
return std::make_shared<DataTypeDate32>();
|
||||
case ResultType::DateTime:
|
||||
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false));
|
||||
case ResultType::DateTime64:
|
||||
@ -185,7 +188,13 @@ private:
|
||||
if (time_column_vec)
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDate &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count);
|
||||
}
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName());
|
||||
else if (isDate32(time_column_type))
|
||||
{
|
||||
const auto * time_column_vec = checkAndGetColumn<ColumnDate32>(&time_column_col);
|
||||
if (time_column_vec)
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDate32 &>(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count);
|
||||
}
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, Date32, DateTime or DateTime64", getName());
|
||||
}
|
||||
|
||||
template <typename TimeDataType, typename TimeColumnType>
|
||||
|
@ -91,6 +91,8 @@ void ProgressValues::writeJSON(WriteBuffer & out) const
|
||||
writeText(result_bytes, out);
|
||||
writeCString("\",\"elapsed_ns\":\"", out);
|
||||
writeText(elapsed_ns, out);
|
||||
writeCString("\",\"real_time_microseconds\":\"", out);
|
||||
writeText(real_time_microseconds, out);
|
||||
writeCString("\"", out);
|
||||
writeCString("}", out);
|
||||
}
|
||||
@ -110,6 +112,7 @@ bool Progress::incrementPiecewiseAtomically(const Progress & rhs)
|
||||
result_bytes += rhs.result_bytes;
|
||||
|
||||
elapsed_ns += rhs.elapsed_ns;
|
||||
real_time_microseconds += rhs.real_time_microseconds;
|
||||
|
||||
return rhs.read_rows || rhs.written_rows;
|
||||
}
|
||||
@ -129,6 +132,7 @@ void Progress::reset()
|
||||
result_bytes = 0;
|
||||
|
||||
elapsed_ns = 0;
|
||||
real_time_microseconds = 0;
|
||||
}
|
||||
|
||||
ProgressValues Progress::getValues() const
|
||||
@ -148,6 +152,7 @@ ProgressValues Progress::getValues() const
|
||||
res.result_bytes = result_bytes.load(std::memory_order_relaxed);
|
||||
|
||||
res.elapsed_ns = elapsed_ns.load(std::memory_order_relaxed);
|
||||
res.real_time_microseconds = real_time_microseconds.load(std::memory_order_relaxed);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -169,6 +174,7 @@ ProgressValues Progress::fetchValuesAndResetPiecewiseAtomically()
|
||||
res.result_bytes = result_bytes.fetch_and(0);
|
||||
|
||||
res.elapsed_ns = elapsed_ns.fetch_and(0);
|
||||
res.real_time_microseconds = real_time_microseconds.fetch_and(0);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -190,6 +196,7 @@ Progress Progress::fetchAndResetPiecewiseAtomically()
|
||||
res.result_bytes = result_bytes.fetch_and(0);
|
||||
|
||||
res.elapsed_ns = elapsed_ns.fetch_and(0);
|
||||
res.real_time_microseconds = real_time_microseconds.fetch_and(0);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -209,6 +216,7 @@ Progress & Progress::operator=(Progress && other) noexcept
|
||||
result_bytes = other.result_bytes.load(std::memory_order_relaxed);
|
||||
|
||||
elapsed_ns = other.elapsed_ns.load(std::memory_order_relaxed);
|
||||
real_time_microseconds = other.real_time_microseconds.load(std::memory_order_relaxed);
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -244,4 +252,9 @@ void Progress::incrementElapsedNs(UInt64 elapsed_ns_)
|
||||
elapsed_ns.fetch_add(elapsed_ns_, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void Progress::incrementRealTimeMicroseconds(UInt64 microseconds)
|
||||
{
|
||||
real_time_microseconds.fetch_add(microseconds, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ struct ProgressValues
|
||||
UInt64 result_bytes = 0;
|
||||
|
||||
UInt64 elapsed_ns = 0;
|
||||
UInt64 real_time_microseconds = 0;
|
||||
|
||||
void read(ReadBuffer & in, UInt64 server_revision);
|
||||
void write(WriteBuffer & out, UInt64 client_revision) const;
|
||||
@ -40,6 +41,7 @@ struct ReadProgress
|
||||
UInt64 read_bytes = 0;
|
||||
UInt64 total_rows_to_read = 0;
|
||||
UInt64 total_bytes_to_read = 0;
|
||||
UInt64 real_time_microseconds = 0;
|
||||
|
||||
ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
|
||||
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {}
|
||||
@ -96,6 +98,8 @@ struct Progress
|
||||
|
||||
std::atomic<UInt64> elapsed_ns {0};
|
||||
|
||||
std::atomic<UInt64> real_time_microseconds {0};
|
||||
|
||||
Progress() = default;
|
||||
|
||||
Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0)
|
||||
@ -125,6 +129,8 @@ struct Progress
|
||||
|
||||
void incrementElapsedNs(UInt64 elapsed_ns_);
|
||||
|
||||
void incrementRealTimeMicroseconds(UInt64 microseconds);
|
||||
|
||||
void reset();
|
||||
|
||||
ProgressValues getValues() const;
|
||||
|
@ -443,6 +443,7 @@ std::unique_ptr<ReadBuffer> ReadWriteBufferFromHTTP::initialize()
|
||||
}
|
||||
|
||||
response.getCookies(cookies);
|
||||
response.getHeaders(response_headers);
|
||||
content_encoding = response.get("Content-Encoding", "");
|
||||
|
||||
// Remember file size. It'll be used to report eof in next nextImpl() call.
|
||||
@ -680,6 +681,19 @@ std::string ReadWriteBufferFromHTTP::getResponseCookie(const std::string & name,
|
||||
return def;
|
||||
}
|
||||
|
||||
Map ReadWriteBufferFromHTTP::getResponseHeaders() const
|
||||
{
|
||||
Map map;
|
||||
for (const auto & header : response_headers)
|
||||
{
|
||||
Tuple elem;
|
||||
elem.emplace_back(header.first);
|
||||
elem.emplace_back(header.second);
|
||||
map.emplace_back(elem);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
void ReadWriteBufferFromHTTP::setNextCallback(NextCallback next_callback_)
|
||||
{
|
||||
next_callback = next_callback_;
|
||||
|
@ -90,6 +90,9 @@ private:
|
||||
std::unique_ptr<ReadBuffer> impl;
|
||||
|
||||
std::vector<Poco::Net::HTTPCookie> cookies;
|
||||
|
||||
std::map<String, String> response_headers;
|
||||
|
||||
HTTPHeaderEntries http_header_entries;
|
||||
std::function<void(size_t)> next_callback;
|
||||
|
||||
@ -187,6 +190,8 @@ public:
|
||||
|
||||
HTTPFileInfo getFileInfo();
|
||||
static HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin);
|
||||
|
||||
Map getResponseHeaders() const;
|
||||
};
|
||||
|
||||
using ReadWriteBufferFromHTTPPtr = std::unique_ptr<ReadWriteBufferFromHTTP>;
|
||||
|
@ -787,7 +787,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
|
||||
/// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds.
|
||||
/// But the connection timeout should be small because there is the case when there is no IMDS at all,
|
||||
/// like outside of the cloud, on your own machines.
|
||||
aws_client_configuration.connectTimeoutMs = 10;
|
||||
aws_client_configuration.connectTimeoutMs = 50;
|
||||
aws_client_configuration.requestTimeoutMs = 1000;
|
||||
|
||||
aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);
|
||||
|
@ -59,6 +59,18 @@ class CompiledAggregateFunctionsHolder;
|
||||
class NativeWriter;
|
||||
struct OutputBlockColumns;
|
||||
|
||||
struct GroupingSetsParams
|
||||
{
|
||||
GroupingSetsParams() = default;
|
||||
|
||||
GroupingSetsParams(Names used_keys_, Names missing_keys_) : used_keys(std::move(used_keys_)), missing_keys(std::move(missing_keys_)) { }
|
||||
|
||||
Names used_keys;
|
||||
Names missing_keys;
|
||||
};
|
||||
|
||||
using GroupingSetsParamsList = std::vector<GroupingSetsParams>;
|
||||
|
||||
/** How are "total" values calculated with WITH TOTALS?
|
||||
* (For more details, see TotalsHavingTransform.)
|
||||
*
|
||||
|
@ -389,6 +389,10 @@ AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr
|
||||
if (data_kind == DataKind::Preprocessed)
|
||||
insert_query.format = "Native";
|
||||
|
||||
/// Query parameters make sense only for format Values.
|
||||
if (insert_query.format == "Values")
|
||||
entry->query_parameters = query_context->getQueryParameters();
|
||||
|
||||
InsertQuery key{query, query_context->getUserID(), query_context->getCurrentRoles(), settings, data_kind};
|
||||
InsertDataPtr data_to_process;
|
||||
std::future<void> insert_future;
|
||||
@ -999,13 +1003,20 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
||||
"Expected entry with data kind Parsed. Got: {}", entry->chunk.getDataKind());
|
||||
|
||||
auto buffer = std::make_unique<ReadBufferFromString>(*bytes);
|
||||
executor.setQueryParameters(entry->query_parameters);
|
||||
|
||||
size_t num_bytes = bytes->size();
|
||||
size_t num_rows = executor.execute(*buffer);
|
||||
|
||||
total_rows += num_rows;
|
||||
chunk_info->offsets.push_back(total_rows);
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
/// for some reason, client can pass zero rows and bytes to server.
|
||||
/// We don't update offsets in this case, because we assume every insert has some rows during dedup
|
||||
/// but we have nothing to deduplicate for this insert.
|
||||
if (num_rows > 0)
|
||||
{
|
||||
chunk_info->offsets.push_back(total_rows);
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
}
|
||||
|
||||
add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms);
|
||||
|
||||
@ -1056,8 +1067,14 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
|
||||
result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size());
|
||||
|
||||
total_rows += block->rows();
|
||||
chunk_info->offsets.push_back(total_rows);
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
/// for some reason, client can pass zero rows and bytes to server.
|
||||
/// We don't update offsets in this case, because we assume every insert has some rows during dedup,
|
||||
/// but we have nothing to deduplicate for this insert.
|
||||
if (block->rows())
|
||||
{
|
||||
chunk_info->offsets.push_back(total_rows);
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
}
|
||||
|
||||
const auto & query_for_logging = get_query_by_format(entry->format);
|
||||
add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms);
|
||||
|
@ -147,6 +147,7 @@ private:
|
||||
const String format;
|
||||
MemoryTracker * const user_memory_tracker;
|
||||
const std::chrono::time_point<std::chrono::system_clock> create_time;
|
||||
NameToNameMap query_parameters;
|
||||
|
||||
Entry(
|
||||
DataChunk && chunk_,
|
||||
|
@ -893,6 +893,12 @@ ContextData::ContextData(const ContextData &o) :
|
||||
{
|
||||
}
|
||||
|
||||
void ContextData::resetSharedContext()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_shared_context);
|
||||
shared = nullptr;
|
||||
}
|
||||
|
||||
Context::Context() = default;
|
||||
Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this<Context>(rhs) {}
|
||||
|
||||
@ -914,14 +920,6 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part)
|
||||
return res;
|
||||
}
|
||||
|
||||
void Context::initGlobal()
|
||||
{
|
||||
assert(!global_context_instance);
|
||||
global_context_instance = shared_from_this();
|
||||
DatabaseCatalog::init(shared_from_this());
|
||||
EventNotifier::init();
|
||||
}
|
||||
|
||||
SharedContextHolder Context::createShared()
|
||||
{
|
||||
return SharedContextHolder(std::make_unique<ContextSharedPart>());
|
||||
@ -2692,7 +2690,11 @@ void Context::makeSessionContext()
|
||||
|
||||
void Context::makeGlobalContext()
|
||||
{
|
||||
initGlobal();
|
||||
assert(!global_context_instance);
|
||||
global_context_instance = shared_from_this();
|
||||
DatabaseCatalog::init(shared_from_this());
|
||||
EventNotifier::init();
|
||||
|
||||
global_context = shared_from_this();
|
||||
}
|
||||
|
||||
@ -4088,8 +4090,13 @@ void Context::initializeTraceCollector()
|
||||
}
|
||||
|
||||
/// Call after unexpected crash happen.
|
||||
void Context::handleCrash() const TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
void Context::handleCrash() const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_shared_context);
|
||||
if (!shared)
|
||||
return;
|
||||
|
||||
SharedLockGuard lock2(shared->mutex);
|
||||
if (shared->system_logs)
|
||||
shared->system_logs->handleCrash();
|
||||
}
|
||||
|
@ -492,6 +492,8 @@ public:
|
||||
|
||||
KitchenSink kitchen_sink;
|
||||
|
||||
void resetSharedContext();
|
||||
|
||||
protected:
|
||||
using SampleBlockCache = std::unordered_map<std::string, Block>;
|
||||
mutable SampleBlockCache sample_block_cache;
|
||||
@ -529,6 +531,10 @@ protected:
|
||||
mutable ThrottlerPtr local_write_query_throttler; /// A query-wide throttler for local IO writes
|
||||
|
||||
mutable ThrottlerPtr backups_query_throttler; /// A query-wide throttler for BACKUPs
|
||||
|
||||
mutable std::mutex mutex_shared_context; /// mutex to avoid accessing destroyed shared context pointer
|
||||
/// some Context methods can be called after the shared context is destroyed
|
||||
/// example, Context::handleCrash() method - called from signal handler
|
||||
};
|
||||
|
||||
/** A set of known objects that can be used in the query.
|
||||
@ -1387,8 +1393,6 @@ private:
|
||||
|
||||
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderWithLock(const std::lock_guard<std::mutex> & lock);
|
||||
|
||||
void initGlobal();
|
||||
|
||||
void setUserID(const UUID & user_id_);
|
||||
void setCurrentRolesImpl(const std::vector<UUID> & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr<const User> & user);
|
||||
|
||||
|
@ -107,7 +107,9 @@ BlockIO InterpreterDeleteQuery::execute()
|
||||
String alter_query =
|
||||
"ALTER TABLE " + table->getStorageID().getFullTableName()
|
||||
+ (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
|
||||
+ " UPDATE `_row_exists` = 0 WHERE " + serializeAST(*delete_query.predicate);
|
||||
+ " UPDATE `_row_exists` = 0"
|
||||
+ (delete_query.partition ? " IN PARTITION " + serializeAST(*delete_query.partition) : "")
|
||||
+ " WHERE " + serializeAST(*delete_query.predicate);
|
||||
|
||||
ParserAlterQuery parser;
|
||||
ASTPtr alter_ast = parseQuery(
|
||||
|
@ -347,6 +347,27 @@ bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
|
||||
return false;
|
||||
}
|
||||
|
||||
GroupingSetsParamsList getAggregatorGroupingSetsParams(const NamesAndTypesLists & aggregation_keys_list, const Names & all_keys)
|
||||
{
|
||||
GroupingSetsParamsList result;
|
||||
|
||||
for (const auto & aggregation_keys : aggregation_keys_list)
|
||||
{
|
||||
NameSet keys;
|
||||
for (const auto & key : aggregation_keys)
|
||||
keys.insert(key.name);
|
||||
|
||||
Names missing_keys;
|
||||
for (const auto & key : all_keys)
|
||||
if (!keys.contains(key))
|
||||
missing_keys.push_back(key);
|
||||
|
||||
result.emplace_back(aggregation_keys.getNames(), std::move(missing_keys));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
@ -2005,13 +2026,12 @@ static void executeMergeAggregatedImpl(
|
||||
bool has_grouping_sets,
|
||||
const Settings & settings,
|
||||
const NamesAndTypesList & aggregation_keys,
|
||||
const NamesAndTypesLists & aggregation_keys_list,
|
||||
const AggregateDescriptions & aggregates,
|
||||
bool should_produce_results_in_order_of_bucket_number,
|
||||
SortDescription group_by_sort_description)
|
||||
{
|
||||
auto keys = aggregation_keys.getNames();
|
||||
if (has_grouping_sets)
|
||||
keys.insert(keys.begin(), "__grouping_set");
|
||||
|
||||
/** There are two modes of distributed aggregation.
|
||||
*
|
||||
@ -2029,10 +2049,12 @@ static void executeMergeAggregatedImpl(
|
||||
*/
|
||||
|
||||
Aggregator::Params params(keys, aggregates, overflow_row, settings.max_threads, settings.max_block_size, settings.min_hit_rate_to_use_consecutive_keys_optimization);
|
||||
auto grouping_sets_params = getAggregatorGroupingSetsParams(aggregation_keys_list, keys);
|
||||
|
||||
auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
params,
|
||||
grouping_sets_params,
|
||||
final,
|
||||
/// Grouping sets don't work with distributed_aggregation_memory_efficient enabled (#43989)
|
||||
settings.distributed_aggregation_memory_efficient && is_remote_storage && !has_grouping_sets,
|
||||
@ -2653,30 +2675,6 @@ static Aggregator::Params getAggregatorParams(
|
||||
};
|
||||
}
|
||||
|
||||
static GroupingSetsParamsList getAggregatorGroupingSetsParams(const SelectQueryExpressionAnalyzer & query_analyzer, const Names & all_keys)
|
||||
{
|
||||
GroupingSetsParamsList result;
|
||||
if (query_analyzer.useGroupingSetKey())
|
||||
{
|
||||
auto const & aggregation_keys_list = query_analyzer.aggregationKeysList();
|
||||
|
||||
for (const auto & aggregation_keys : aggregation_keys_list)
|
||||
{
|
||||
NameSet keys;
|
||||
for (const auto & key : aggregation_keys)
|
||||
keys.insert(key.name);
|
||||
|
||||
Names missing_keys;
|
||||
for (const auto & key : all_keys)
|
||||
if (!keys.contains(key))
|
||||
missing_keys.push_back(key);
|
||||
|
||||
result.emplace_back(aggregation_keys.getNames(), std::move(missing_keys));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info)
|
||||
{
|
||||
executeExpression(query_plan, expression, "Before GROUP BY");
|
||||
@ -2696,7 +2694,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
|
||||
settings.group_by_two_level_threshold,
|
||||
settings.group_by_two_level_threshold_bytes);
|
||||
|
||||
auto grouping_sets_params = getAggregatorGroupingSetsParams(*query_analyzer, keys);
|
||||
auto grouping_sets_params = getAggregatorGroupingSetsParams(query_analyzer->aggregationKeysList(), keys);
|
||||
|
||||
SortDescription group_by_sort_description;
|
||||
SortDescription sort_description_for_merging;
|
||||
@ -2764,6 +2762,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
|
||||
has_grouping_sets,
|
||||
context->getSettingsRef(),
|
||||
query_analyzer->aggregationKeys(),
|
||||
query_analyzer->aggregationKeysList(),
|
||||
query_analyzer->aggregates(),
|
||||
should_produce_results_in_order_of_bucket_number,
|
||||
std::move(group_by_sort_description));
|
||||
|
@ -90,6 +90,7 @@ namespace ProfileEvents
|
||||
extern const Event SelectQueryTimeMicroseconds;
|
||||
extern const Event InsertQueryTimeMicroseconds;
|
||||
extern const Event OtherQueryTimeMicroseconds;
|
||||
extern const Event RealTimeMicroseconds;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -398,9 +399,14 @@ void logQueryFinish(
|
||||
/// Update performance counters before logging to query_log
|
||||
CurrentThread::finalizePerformanceCounters();
|
||||
|
||||
QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events);
|
||||
elem.type = QueryLogElementType::QUERY_FINISH;
|
||||
std::shared_ptr<ProfileEvents::Counters::Snapshot> profile_counters;
|
||||
QueryStatusInfo info = process_list_elem->getInfo(true, true);
|
||||
if (context->getSettingsRef().log_profile_events)
|
||||
profile_counters = info.profile_counters;
|
||||
else
|
||||
profile_counters.swap(info.profile_counters);
|
||||
|
||||
elem.type = QueryLogElementType::QUERY_FINISH;
|
||||
addStatusInfoToQueryLogElement(elem, info, query_ast, context);
|
||||
|
||||
if (pulling_pipeline)
|
||||
@ -419,6 +425,7 @@ void logQueryFinish(
|
||||
{
|
||||
Progress p;
|
||||
p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}});
|
||||
p.incrementRealTimeMicroseconds((*profile_counters)[ProfileEvents::RealTimeMicroseconds]);
|
||||
progress_callback(p);
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,12 @@ void ASTDeleteQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
|
||||
|
||||
formatOnCluster(settings);
|
||||
|
||||
if (partition)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : "");
|
||||
partition->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
|
||||
predicate->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
@ -19,6 +19,11 @@ public:
|
||||
return removeOnCluster<ASTDeleteQuery>(clone(), params.default_database);
|
||||
}
|
||||
|
||||
/** Used in DELETE FROM queries.
|
||||
* The value or ID of the partition is stored here.
|
||||
*/
|
||||
ASTPtr partition;
|
||||
|
||||
ASTPtr predicate;
|
||||
|
||||
protected:
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Parsers/parseDatabaseAndTableName.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/ParserSetQuery.h>
|
||||
#include <Parsers/ParserPartition.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -15,11 +16,14 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
|
||||
ParserKeyword s_delete(Keyword::DELETE);
|
||||
ParserKeyword s_from(Keyword::FROM);
|
||||
ParserKeyword s_in_partition(Keyword::IN_PARTITION);
|
||||
ParserKeyword s_where(Keyword::WHERE);
|
||||
ParserExpression parser_exp_elem;
|
||||
ParserKeyword s_settings(Keyword::SETTINGS);
|
||||
ParserKeyword s_on{Keyword::ON};
|
||||
|
||||
ParserPartition parser_partition;
|
||||
|
||||
if (s_delete.ignore(pos, expected))
|
||||
{
|
||||
if (!s_from.ignore(pos, expected))
|
||||
@ -36,6 +40,12 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
query->cluster = cluster_str;
|
||||
}
|
||||
|
||||
if (s_in_partition.ignore(pos, expected))
|
||||
{
|
||||
if (!parser_partition.parse(pos, query->partition, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!s_where.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
@ -53,6 +63,9 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
else
|
||||
return false;
|
||||
|
||||
if (query->partition)
|
||||
query->children.push_back(query->partition);
|
||||
|
||||
if (query->predicate)
|
||||
query->children.push_back(query->predicate);
|
||||
|
||||
|
@ -504,8 +504,6 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
|
||||
*/
|
||||
|
||||
auto keys = aggregation_analysis_result.aggregation_keys;
|
||||
if (!aggregation_analysis_result.grouping_sets_parameters_list.empty())
|
||||
keys.insert(keys.begin(), "__grouping_set");
|
||||
|
||||
Aggregator::Params params(keys,
|
||||
aggregation_analysis_result.aggregate_descriptions,
|
||||
@ -530,6 +528,7 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
|
||||
auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
params,
|
||||
aggregation_analysis_result.grouping_sets_parameters_list,
|
||||
query_analysis_result.aggregate_final,
|
||||
/// Grouping sets don't work with distributed_aggregation_memory_efficient enabled (#43989)
|
||||
settings.distributed_aggregation_memory_efficient && (is_remote_storage || parallel_replicas_from_merge_tree) && !query_analysis_result.aggregation_with_rollup_or_cube_or_grouping_sets,
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Processors/Executors/StreamingFormatExecutor.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -32,6 +33,13 @@ MutableColumns StreamingFormatExecutor::getResultColumns()
|
||||
return ret_columns;
|
||||
}
|
||||
|
||||
void StreamingFormatExecutor::setQueryParameters(const NameToNameMap & parameters)
|
||||
{
|
||||
/// Query parameters make sense only for format Values.
|
||||
if (auto * values_format = typeid_cast<ValuesBlockInputFormat *>(format.get()))
|
||||
values_format->setQueryParameters(parameters);
|
||||
}
|
||||
|
||||
size_t StreamingFormatExecutor::execute(ReadBuffer & buffer)
|
||||
{
|
||||
format->setReadBuffer(buffer);
|
||||
|
@ -39,6 +39,9 @@ public:
|
||||
/// Releases currently accumulated columns.
|
||||
MutableColumns getResultColumns();
|
||||
|
||||
/// Sets query parameters for input format if applicable.
|
||||
void setQueryParameters(const NameToNameMap & parameters);
|
||||
|
||||
private:
|
||||
const Block header;
|
||||
const InputFormatPtr format;
|
||||
|
@ -54,13 +54,8 @@ void checkFinalInferredType(
|
||||
type = default_type;
|
||||
}
|
||||
|
||||
if (settings.schema_inference_make_columns_nullable)
|
||||
if (settings.schema_inference_make_columns_nullable == 1)
|
||||
type = makeNullableRecursively(type);
|
||||
/// In case when data for some column could contain nulls and regular values,
|
||||
/// resulting inferred type is Nullable.
|
||||
/// If input_format_null_as_default is enabled, we should remove Nullable type.
|
||||
else if (settings.null_as_default)
|
||||
type = removeNullable(type);
|
||||
}
|
||||
|
||||
void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)
|
||||
|
@ -204,8 +204,11 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
|
||||
schema = file_reader->schema();
|
||||
|
||||
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||
*schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference);
|
||||
if (format_settings.schema_inference_make_columns_nullable)
|
||||
*schema,
|
||||
stream ? "ArrowStream" : "Arrow",
|
||||
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference,
|
||||
format_settings.schema_inference_make_columns_nullable != 0);
|
||||
if (format_settings.schema_inference_make_columns_nullable == 1)
|
||||
return getNamesAndRecursivelyNullableTypes(header);
|
||||
return header.getNamesAndTypesList();
|
||||
}
|
||||
|
@ -727,6 +727,7 @@ struct ReadColumnFromArrowColumnSettings
|
||||
FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior;
|
||||
bool allow_arrow_null_type;
|
||||
bool skip_columns_with_unsupported_types;
|
||||
bool allow_inferring_nullable_columns;
|
||||
};
|
||||
|
||||
static ColumnWithTypeAndName readColumnFromArrowColumn(
|
||||
@ -1109,7 +1110,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
|
||||
bool is_map_nested_column,
|
||||
const ReadColumnFromArrowColumnSettings & settings)
|
||||
{
|
||||
bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable());
|
||||
bool read_as_nullable_column = (arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable())) && settings.allow_inferring_nullable_columns;
|
||||
if (read_as_nullable_column &&
|
||||
arrow_column->type()->id() != arrow::Type::LIST &&
|
||||
arrow_column->type()->id() != arrow::Type::LARGE_LIST &&
|
||||
@ -1173,14 +1174,16 @@ static std::shared_ptr<arrow::ChunkedArray> createArrowColumn(const std::shared_
|
||||
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||
const arrow::Schema & schema,
|
||||
const std::string & format_name,
|
||||
bool skip_columns_with_unsupported_types)
|
||||
bool skip_columns_with_unsupported_types,
|
||||
bool allow_inferring_nullable_columns)
|
||||
{
|
||||
ReadColumnFromArrowColumnSettings settings
|
||||
{
|
||||
.format_name = format_name,
|
||||
.date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore,
|
||||
.allow_arrow_null_type = false,
|
||||
.skip_columns_with_unsupported_types = skip_columns_with_unsupported_types
|
||||
.skip_columns_with_unsupported_types = skip_columns_with_unsupported_types,
|
||||
.allow_inferring_nullable_columns = allow_inferring_nullable_columns,
|
||||
};
|
||||
|
||||
ColumnsWithTypeAndName sample_columns;
|
||||
@ -1254,7 +1257,8 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam
|
||||
.format_name = format_name,
|
||||
.date_time_overflow_behavior = date_time_overflow_behavior,
|
||||
.allow_arrow_null_type = true,
|
||||
.skip_columns_with_unsupported_types = false
|
||||
.skip_columns_with_unsupported_types = false,
|
||||
.allow_inferring_nullable_columns = true
|
||||
};
|
||||
|
||||
Columns columns;
|
||||
|
@ -34,7 +34,8 @@ public:
|
||||
static Block arrowSchemaToCHHeader(
|
||||
const arrow::Schema & schema,
|
||||
const std::string & format_name,
|
||||
bool skip_columns_with_unsupported_types = false);
|
||||
bool skip_columns_with_unsupported_types = false,
|
||||
bool allow_inferring_nullable_columns = true);
|
||||
|
||||
struct DictionaryInfo
|
||||
{
|
||||
|
@ -15,8 +15,8 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
template <bool with_defaults>
|
||||
BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
|
||||
: RowInputFormatWithNamesAndTypes<BinaryFormatReader<with_defaults>>(
|
||||
header,
|
||||
in_,
|
||||
params_,
|
||||
|
@ -10,13 +10,16 @@ namespace DB
|
||||
|
||||
class ReadBuffer;
|
||||
|
||||
template <bool>
|
||||
class BinaryFormatReader;
|
||||
|
||||
/** A stream for inputting data in a binary line-by-line format.
|
||||
*/
|
||||
template <bool with_defaults = false>
|
||||
class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes
|
||||
class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes<BinaryFormatReader<with_defaults>>
|
||||
{
|
||||
public:
|
||||
BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
|
||||
BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
|
||||
|
||||
String getName() const override { return "BinaryRowInputFormat"; }
|
||||
|
||||
|
@ -61,7 +61,7 @@ CSVRowInputFormat::CSVRowInputFormat(
|
||||
bool with_names_,
|
||||
bool with_types_,
|
||||
const FormatSettings & format_settings_,
|
||||
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_)
|
||||
std::unique_ptr<CSVFormatReader> format_reader_)
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
header_,
|
||||
*in_,
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
|
||||
@ -13,10 +12,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CSVFormatReader;
|
||||
|
||||
/** A stream for inputting data in csv format.
|
||||
* Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values.
|
||||
*/
|
||||
class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes
|
||||
class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes<CSVFormatReader>
|
||||
{
|
||||
public:
|
||||
/** with_names - in the first line the header with column names
|
||||
@ -32,7 +33,7 @@ public:
|
||||
|
||||
protected:
|
||||
CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_, const Params & params_,
|
||||
bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_);
|
||||
bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr<CSVFormatReader> format_reader_);
|
||||
|
||||
CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_buf_, const Params & params_,
|
||||
bool with_names_, bool with_types_, const FormatSettings & format_settings_);
|
||||
|
@ -9,7 +9,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes
|
||||
class CustomSeparatedFormatReader;
|
||||
class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes<CustomSeparatedFormatReader>
|
||||
{
|
||||
public:
|
||||
CustomSeparatedRowInputFormat(
|
||||
|
@ -11,7 +11,7 @@ namespace DB
|
||||
{
|
||||
|
||||
class ReadBuffer;
|
||||
|
||||
class JSONCompactEachRowFormatReader;
|
||||
|
||||
/** A stream for reading data in a bunch of formats:
|
||||
* - JSONCompactEachRow
|
||||
@ -20,7 +20,7 @@ class ReadBuffer;
|
||||
* - JSONCompactStringsEachRowWithNamesAndTypes
|
||||
*
|
||||
*/
|
||||
class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes
|
||||
class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes<JSONCompactEachRowFormatReader>
|
||||
{
|
||||
public:
|
||||
JSONCompactEachRowRowInputFormat(
|
||||
|
@ -14,7 +14,7 @@ namespace ErrorCodes
|
||||
|
||||
JSONCompactRowInputFormat::JSONCompactRowInputFormat(
|
||||
const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_)
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
: RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>(
|
||||
header_, in_, params_, false, false, false, format_settings_, std::make_unique<JSONCompactFormatReader>(in_, format_settings_))
|
||||
{
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user