diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1388131286f..76933b4e2e4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -356,8 +356,32 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH + PVSCheck: + needs: DockerHubPush + if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }} + runs-on: [self-hosted, func-tester] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'recursive' + - name: PVS Check + env: + TEMP_PATH: ${{runner.temp}}/pvs_check + REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 pvs_check.py + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH FinishCheck: - needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug, DocsCheck, StressTestDebug, ASTFuzzerTestDebug, IntegrationTestsAsan] + needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug, DocsCheck, StressTestDebug, ASTFuzzerTestDebug, IntegrationTestsAsan, PVSCheck] runs-on: [self-hosted, style-checker] steps: - name: Check out repository code diff --git a/.gitmodules b/.gitmodules index e0404c1269d..8ad81b5094f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -76,7 +76,7 @@ url = https://github.com/ClickHouse-Extras/libcxxabi.git [submodule "contrib/snappy"] path = contrib/snappy - url = https://github.com/google/snappy + url = https://github.com/ClickHouse-Extras/snappy.git [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/mfontanini/cppkafka.git diff --git a/CHANGELOG.md b/CHANGELOG.md index adaaa0f1bc7..686f0072005 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,269 @@ +### ClickHouse release v21.11, 2021-11-09 + +#### Backward Incompatible Change + +* Change order of json_path and json arguments in SQL/JSON functions (to be consistent with the standard). Closes [#30449](https://github.com/ClickHouse/ClickHouse/issues/30449). [#30474](https://github.com/ClickHouse/ClickHouse/pull/30474) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove `MergeTree` table setting `write_final_mark`. It will be always `true`. [#30455](https://github.com/ClickHouse/ClickHouse/pull/30455) ([Kseniia Sumarokova](https://github.com/kssenii)). No actions required, all tables are compatible with the new version. +* Function `bayesAB` is removed. Please help to return this function back, refreshed. This closes [#26233](https://github.com/ClickHouse/ClickHouse/issues/26233). [#29934](https://github.com/ClickHouse/ClickHouse/pull/29934) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* This is relevant only if you already started using the experimental `clickhouse-keeper` support. Now ClickHouse Keeper snapshots compressed with `ZSTD` codec by default instead of custom ClickHouse LZ4 block compression. This behavior can be turned off with `compress_snapshots_with_zstd_format` coordination setting (must be equal on all quorum replicas). Backward incompatibility is quite rare and may happen only when new node will send snapshot (happens in case of recovery) to the old node which is unable to read snapshots in ZSTD format. [#29417](https://github.com/ClickHouse/ClickHouse/pull/29417) ([alesapin](https://github.com/alesapin)). + +#### New Feature + +* New asynchronous INSERT mode allows to accumulate inserted data and store it in a single batch in background. On client it can be enabled by setting `async_insert` for `INSERT` queries with data inlined in query or in separate buffer (e.g. for `INSERT` queries via HTTP protocol). If `wait_for_async_insert` is true (by default) the client will wait until data will be flushed to table. On server-side it controlled by the settings `async_insert_threads`, `async_insert_max_data_size` and `async_insert_busy_timeout_ms`. Implements [#18282](https://github.com/ClickHouse/ClickHouse/issues/18282). [#27537](https://github.com/ClickHouse/ClickHouse/pull/27537) ([Anton Popov](https://github.com/CurtizJ)). [#20557](https://github.com/ClickHouse/ClickHouse/pull/20557) ([Ivan](https://github.com/abyss7)). Notes on performance: with asynchronous inserts you can do up to around 10 000 individual INSERT queries per second, so it is still recommended to insert in batches if you want to achieve performance up to millions inserted rows per second. +* Add interactive mode for `clickhouse-local`. So, you can just run `clickhouse-local` to get a command line ClickHouse interface without connecting to a server and process data from files and external data sources. Also merge the code of `clickhouse-client` and `clickhouse-local` together. Closes [#7203](https://github.com/ClickHouse/ClickHouse/issues/7203). Closes [#25516](https://github.com/ClickHouse/ClickHouse/issues/25516). Closes [#22401](https://github.com/ClickHouse/ClickHouse/issues/22401). [#26231](https://github.com/ClickHouse/ClickHouse/pull/26231) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added support for executable (scriptable) user defined functions. These are UDFs that can be written in any programming language. [#28803](https://github.com/ClickHouse/ClickHouse/pull/28803) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow predefined connections to external data sources. This allows to avoid specifying credentials or addresses while using external data sources, they can be referenced by names instead. Closes [#28367](https://github.com/ClickHouse/ClickHouse/issues/28367). [#28577](https://github.com/ClickHouse/ClickHouse/pull/28577) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `INFORMATION_SCHEMA` database with `SCHEMATA`, `TABLES`, `VIEWS` and `COLUMNS` views to the corresponding tables in `system` database. Closes [#9770](https://github.com/ClickHouse/ClickHouse/issues/9770). [#28691](https://github.com/ClickHouse/ClickHouse/pull/28691) ([tavplubix](https://github.com/tavplubix)). +* Support `EXISTS (subquery)`. Closes [#6852](https://github.com/ClickHouse/ClickHouse/issues/6852). [#29731](https://github.com/ClickHouse/ClickHouse/pull/29731) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Session logging for audit. Logging all successful and failed login and logout events to a new `system.session_log` table. [#22415](https://github.com/ClickHouse/ClickHouse/pull/22415) ([Vasily Nemkov](https://github.com/Enmk)) ([Vitaly Baranov](https://github.com/vitlibar)). +* Support multidimensional cosine distance and euclidean distance functions; L1, L2, Lp, Linf distances and norms. Scalar product on tuples and various arithmetic operators on tuples. This fully closes [#4509](https://github.com/ClickHouse/ClickHouse/issues/4509) and even more. [#27933](https://github.com/ClickHouse/ClickHouse/pull/27933) ([Alexey Boykov](https://github.com/mathalex)). +* Add support for compression and decompression for `INTO OUTPUT` and `FROM INFILE` (with autodetect or with additional optional parameter). [#27135](https://github.com/ClickHouse/ClickHouse/pull/27135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add CORS (Cross Origin Resource Sharing) support with HTTP `OPTIONS` request. It means, now Grafana will work with serverless requests without a kludges. Closes [#18693](https://github.com/ClickHouse/ClickHouse/issues/18693). [#29155](https://github.com/ClickHouse/ClickHouse/pull/29155) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Queries with JOIN ON now supports disjunctions (OR). [#21320](https://github.com/ClickHouse/ClickHouse/pull/21320) ([Ilya Golshtein](https://github.com/ilejn)). +* Added function `tokens`. That allow to split string into tokens using non-alpha numeric ASCII characters as separators. [#29981](https://github.com/ClickHouse/ClickHouse/pull/29981) ([Maksim Kita](https://github.com/kitaisreal)). Added function `ngrams` to extract ngrams from text. Closes [#29699](https://github.com/ClickHouse/ClickHouse/issues/29699). [#29738](https://github.com/ClickHouse/ClickHouse/pull/29738) ([Maksim Kita](https://github.com/kitaisreal)). +* Add functions for Unicode normalization: `normalizeUTF8NFC`, `normalizeUTF8NFD`, `normalizeUTF8NFKC`, `normalizeUTF8NFKD` functions. [#28633](https://github.com/ClickHouse/ClickHouse/pull/28633) ([darkkeks](https://github.com/darkkeks)). +* Streaming consumption of application log files in ClickHouse with `FileLog` table engine. It's like `Kafka` or `RabbitMQ` engine but for append-only and rotated logs in local filesystem. Closes [#6953](https://github.com/ClickHouse/ClickHouse/issues/6953). [#25969](https://github.com/ClickHouse/ClickHouse/pull/25969) ([flynn](https://github.com/ucasfl)) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `CapnProto` output format, refactor `CapnProto` input format. [#29291](https://github.com/ClickHouse/ClickHouse/pull/29291) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to write number in query as binary literal. Example `SELECT 0b001;`. [#29304](https://github.com/ClickHouse/ClickHouse/pull/29304) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `hashed_array` dictionary type. It saves memory when using dictionaries with multiple attributes. Closes [#30236](https://github.com/ClickHouse/ClickHouse/issues/30236). [#30242](https://github.com/ClickHouse/ClickHouse/pull/30242) ([Maksim Kita](https://github.com/kitaisreal)). +* Added `JSONExtractKeys` function. [#30056](https://github.com/ClickHouse/ClickHouse/pull/30056) ([Vitaly](https://github.com/orloffv)). +* Add a function `getOSKernelVersion` - it returns a string with OS kernel version. [#29755](https://github.com/ClickHouse/ClickHouse/pull/29755) ([Memo](https://github.com/Joeywzr)). +* Added `MD4` and `SHA384` functions. MD4 is an obsolete and insecure hash function, it can be used only in rare cases when MD4 is already being used in some legacy system and you need to get exactly the same result. [#29602](https://github.com/ClickHouse/ClickHouse/pull/29602) ([Nikita Tikhomirov](https://github.com/NSTikhomirov)). +* HSTS can be enabled for Clickhouse HTTP server by setting `hsts_max_age` in configuration file with a positive number. [#29516](https://github.com/ClickHouse/ClickHouse/pull/29516) ([凌涛](https://github.com/lingtaolf)). +* Huawei OBS Storage support. Closes [#24294](https://github.com/ClickHouse/ClickHouse/issues/24294). [#29511](https://github.com/ClickHouse/ClickHouse/pull/29511) ([kevin wan](https://github.com/MaxWk)). +* New function `mapContainsKeyLike` to get the map that key matches a simple regular expression. [#29471](https://github.com/ClickHouse/ClickHouse/pull/29471) ([凌涛](https://github.com/lingtaolf)). New function `mapExtractKeyLike` to get the map only kept elements matched specified pattern. [#30793](https://github.com/ClickHouse/ClickHouse/pull/30793) ([凌涛](https://github.com/lingtaolf)). +* Implemented `ALTER TABLE x MODIFY COMMENT`. [#29264](https://github.com/ClickHouse/ClickHouse/pull/29264) ([Vasily Nemkov](https://github.com/Enmk)). +* Adds H3 inspection functions that are missing from ClickHouse but are available via the H3 api: https://h3geo.org/docs/api/inspection. [#29209](https://github.com/ClickHouse/ClickHouse/pull/29209) ([Bharat Nallan](https://github.com/bharatnc)). +* Allow non-replicated ALTER TABLE FETCH and ATTACH in Replicated databases. [#29202](https://github.com/ClickHouse/ClickHouse/pull/29202) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Added a setting `output_format_csv_null_representation`: This is the same as `output_format_tsv_null_representation` but is for CSV output. [#29123](https://github.com/ClickHouse/ClickHouse/pull/29123) ([PHO](https://github.com/depressed-pho)). +* Added function `zookeeperSessionUptime()` which returns uptime of current ZooKeeper session in seconds. [#28983](https://github.com/ClickHouse/ClickHouse/pull/28983) ([tavplubix](https://github.com/tavplubix)). +* Implements the `h3ToGeoBoundary` function. [#28952](https://github.com/ClickHouse/ClickHouse/pull/28952) ([Ivan Veselov](https://github.com/fuzzERot)). +* Add aggregate function `exponentialMovingAverage` that can be used as window function. This closes [#27511](https://github.com/ClickHouse/ClickHouse/issues/27511). [#28914](https://github.com/ClickHouse/ClickHouse/pull/28914) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow to include subcolumns of table columns into `DESCRIBE` query result (can be enabled by setting `describe_include_subcolumns`). [#28905](https://github.com/ClickHouse/ClickHouse/pull/28905) ([Anton Popov](https://github.com/CurtizJ)). +* `Executable`, `ExecutablePool` added option `send_chunk_header`. If this option is true then chunk rows_count with line break will be sent to client before chunk. [#28833](https://github.com/ClickHouse/ClickHouse/pull/28833) ([Maksim Kita](https://github.com/kitaisreal)). +* `tokenbf_v1` and `ngram` support Map with key of String of FixedSring type. It enhance data skipping in query with map key filter. ```sql CREATE TABLE map_tokenbf ( row_id UInt32, map Map(String, String), INDEX map_tokenbf map TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1 ) Engine=MergeTree() Order by id ``` With table above, the query `select * from map_tokebf where map['K']='V'` will skip the granule that doesn't contain key `A` . Of course, how many rows will skipped is depended on the `granularity` and `index_granularity` you set. [#28511](https://github.com/ClickHouse/ClickHouse/pull/28511) ([凌涛](https://github.com/lingtaolf)). +* Send profile events from server to client. New packet type `ProfileEvents` was introduced. Closes [#26177](https://github.com/ClickHouse/ClickHouse/issues/26177). [#28364](https://github.com/ClickHouse/ClickHouse/pull/28364) ([Dmitry Novik](https://github.com/novikd)). +* Bit shift operations for `FixedString` and `String` data types. This closes [#27763](https://github.com/ClickHouse/ClickHouse/issues/27763). [#28325](https://github.com/ClickHouse/ClickHouse/pull/28325) ([小路](https://github.com/nicelulu)). +* Support adding / deleting tables to replication from PostgreSQL dynamically in database engine MaterializedPostgreSQL. Support alter for database settings. Closes [#27573](https://github.com/ClickHouse/ClickHouse/issues/27573). [#28301](https://github.com/ClickHouse/ClickHouse/pull/28301) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added function accurateCastOrDefault(x, T). Closes [#21330](https://github.com/ClickHouse/ClickHouse/issues/21330). Authors @taiyang-li. [#23028](https://github.com/ClickHouse/ClickHouse/pull/23028) ([Maksim Kita](https://github.com/kitaisreal)). +* Add Function `toUUIDOrDefault`, `toUInt8/16/32/64/256OrDefault`, `toInt8/16/32/64/128/256OrDefault`, which enables user defining default value(not null) when string parsing is failed. [#21330](https://github.com/ClickHouse/ClickHouse/pull/21330) ([taiyang-li](https://github.com/taiyang-li)). + +#### Performance Improvement + +* Background merges can be preempted by each other and they are scheduled with appropriate priorities. Now long running merges won't prevent short merges to proceed. This is needed for a better scheduling and controlling of merges execution. It reduces the chances to get "too many parts" error. [#22381](https://github.com/ClickHouse/ClickHouse/issues/22381). [#25165](https://github.com/ClickHouse/ClickHouse/pull/25165) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). Added an ability to execute more merges and mutations than the number of threads in background pool. Merges and mutations will be executed step by step according to their sizes (lower is more prioritized). The ratio of the number of tasks to threads to execute is controlled by a setting `background_merges_mutations_concurrency_ratio`, 2 by default. [#29140](https://github.com/ClickHouse/ClickHouse/pull/29140) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow to use asynchronous reads for remote filesystems. Lower the number of seeks while reading from remote filesystems. It improves performance tremendously and makes the experimental `web` and `s3` disks to work faster than EBS under certain conditions. [#29205](https://github.com/ClickHouse/ClickHouse/pull/29205) ([Kseniia Sumarokova](https://github.com/kssenii)). In the meantime, the `web` disk type (static dataset hosted on a web server) is graduated from being experimental to be production ready. +* Queries with `INTO OUTFILE` in `clickhouse-client` will use multiple threads. Fix the issue with flickering progress-bar when using `INTO OUTFILE`. This closes [#30873](https://github.com/ClickHouse/ClickHouse/issues/30873). This closes [#30872](https://github.com/ClickHouse/ClickHouse/issues/30872). [#30886](https://github.com/ClickHouse/ClickHouse/pull/30886) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Reduce amount of redundant compressed data read from disk for some types `SELECT` queries (only for `MergeTree` engines family). [#30111](https://github.com/ClickHouse/ClickHouse/pull/30111) ([alesapin](https://github.com/alesapin)). +* Remove some redundant `seek` calls while reading compressed blocks in MergeTree table engines family. [#29766](https://github.com/ClickHouse/ClickHouse/pull/29766) ([alesapin](https://github.com/alesapin)). +* Make `url` table function to process multiple URLs in parallel. This closes [#29670](https://github.com/ClickHouse/ClickHouse/issues/29670) and closes [#29671](https://github.com/ClickHouse/ClickHouse/issues/29671). [#29673](https://github.com/ClickHouse/ClickHouse/pull/29673) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of aggregation in order of primary key (with enabled setting `optimize_aggregation_in_order`). [#30266](https://github.com/ClickHouse/ClickHouse/pull/30266) ([Anton Popov](https://github.com/CurtizJ)). +* Now clickhouse is using DNS cache while communicating with external S3. [#29999](https://github.com/ClickHouse/ClickHouse/pull/29999) ([alesapin](https://github.com/alesapin)). +* Add support for pushdown of `IS NULL`/`IS NOT NULL` to external databases (i.e. MySQL). [#29463](https://github.com/ClickHouse/ClickHouse/pull/29463) ([Azat Khuzhin](https://github.com/azat)). Transform `isNull`/`isNotNull` to `IS NULL`/`IS NOT NULL` (for external dbs, i.e. MySQL). [#29446](https://github.com/ClickHouse/ClickHouse/pull/29446) ([Azat Khuzhin](https://github.com/azat)). +* SELECT queries from Dictionary tables will use multiple threads. [#30500](https://github.com/ClickHouse/ClickHouse/pull/30500) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance for filtering (WHERE operation) of `Decimal` columns. [#30431](https://github.com/ClickHouse/ClickHouse/pull/30431) ([Jun Jin](https://github.com/vesslanjin)). +* Remove branchy code in filter operation with a better implementation with popcnt/ctz which have better performance. [#29881](https://github.com/ClickHouse/ClickHouse/pull/29881) ([Jun Jin](https://github.com/vesslanjin)). +* Improve filter bytemask generator (used for WHERE operator) function all in one with SSE/AVX2/AVX512 instructions. Note that by default ClickHouse is only using SSE, so it's only relevant for custom builds. [#30014](https://github.com/ClickHouse/ClickHouse/pull/30014) ([jasperzhu](https://github.com/jinjunzh)). [#30670](https://github.com/ClickHouse/ClickHouse/pull/30670) ([jasperzhu](https://github.com/jinjunzh)). +* Improve the performance of SUM aggregate function of Nullable floating point numbers. [#28906](https://github.com/ClickHouse/ClickHouse/pull/28906) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up part loading process with multiple disks are in use. The idea is similar to https://github.com/ClickHouse/ClickHouse/pull/16423 . Prod env shows improvement: 24 min -> 16 min . [#28363](https://github.com/ClickHouse/ClickHouse/pull/28363) ([Amos Bird](https://github.com/amosbird)). +* Reduce default settings for S3 multipart upload part size to lower memory usage. [#28679](https://github.com/ClickHouse/ClickHouse/pull/28679) ([ianton-ru](https://github.com/ianton-ru)). +* Speed up `bitmapAnd` function. [#28332](https://github.com/ClickHouse/ClickHouse/pull/28332) ([dddounaiking](https://github.com/OodounaikingoO)). +* Removed sub-optimal mutation notifications in `StorageMergeTree` when merges are still going. [#27552](https://github.com/ClickHouse/ClickHouse/pull/27552) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Attempt to improve performance of string comparison. [#28767](https://github.com/ClickHouse/ClickHouse/pull/28767) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Primary key index and partition filter can work in tuple. [#29281](https://github.com/ClickHouse/ClickHouse/pull/29281) ([凌涛](https://github.com/lingtaolf)). +* If query has multiple quantile aggregate functions with the same arguments but different level parameter, they will be fused together and executed in one pass if the setting `optimize_syntax_fuse_functions` is enabled. [#26657](https://github.com/ClickHouse/ClickHouse/pull/26657) ([hexiaoting](https://github.com/hexiaoting)). +* Now min-max aggregation over the first expression of primary key is optimized by projection. This is for [#329](https://github.com/ClickHouse/ClickHouse/issues/329). [#29918](https://github.com/ClickHouse/ClickHouse/pull/29918) ([Amos Bird](https://github.com/amosbird)). + +#### Experimental Feature + +* Add ability to change nodes configuration (in `.xml` file) for ClickHouse Keeper. [#30372](https://github.com/ClickHouse/ClickHouse/pull/30372) ([alesapin](https://github.com/alesapin)). +* Add `sparkbar` aggregate function. This closes [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175). [#27481](https://github.com/ClickHouse/ClickHouse/pull/27481) ([小路](https://github.com/nicelulu)). Note: there is one flaw in this function, the behaviour will be changed in future releases. + +#### Improvement + +* Allow user to change log levels without restart. [#29586](https://github.com/ClickHouse/ClickHouse/pull/29586) ([Nikolay Degterinsky](https://github.com/evillique)). +* Multiple improvements for SQL UDF. Queries for manipulation of SQL User Defined Functions now support ON CLUSTER clause. Example `CREATE FUNCTION test_function ON CLUSTER 'cluster' AS x -> x + 1;`. Closes [#30666](https://github.com/ClickHouse/ClickHouse/issues/30666). [#30734](https://github.com/ClickHouse/ClickHouse/pull/30734) ([Maksim Kita](https://github.com/kitaisreal)). Support `CREATE OR REPLACE`, `CREATE IF NOT EXISTS` syntaxes. [#30454](https://github.com/ClickHouse/ClickHouse/pull/30454) ([Maksim Kita](https://github.com/kitaisreal)). Added DROP IF EXISTS support. Example `DROP FUNCTION IF EXISTS test_function`. [#30437](https://github.com/ClickHouse/ClickHouse/pull/30437) ([Maksim Kita](https://github.com/kitaisreal)). Support lambdas. Example `CREATE FUNCTION lambda_function AS x -> arrayMap(element -> element * 2, x);`. [#30435](https://github.com/ClickHouse/ClickHouse/pull/30435) ([Maksim Kita](https://github.com/kitaisreal)). Support SQL user defined functions for `clickhouse-local`. [#30179](https://github.com/ClickHouse/ClickHouse/pull/30179) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable per-query memory profiler (set to `memory_profiler_step` = 4MiB) globally. [#29455](https://github.com/ClickHouse/ClickHouse/pull/29455) ([Azat Khuzhin](https://github.com/azat)). +* Added columns `data_compressed_bytes`, `data_uncompressed_bytes`, `marks_bytes` into `system.data_skipping_indices`. Added columns `secondary_indices_compressed_bytes`, `secondary_indices_uncompressed_bytes`, `secondary_indices_marks_bytes` into `system.parts`. Closes [#29697](https://github.com/ClickHouse/ClickHouse/issues/29697). [#29896](https://github.com/ClickHouse/ClickHouse/pull/29896) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `table` alias to system.tables and `database` alias to system.databases [#29677](https://github.com/ClickHouse/ClickHouse/issues/29677). [#29882](https://github.com/ClickHouse/ClickHouse/pull/29882) ([kevin wan](https://github.com/MaxWk)). +* Correctly resolve interdependencies between tables on server startup. Closes [#8004](https://github.com/ClickHouse/ClickHouse/issues/8004), closes [#15170](https://github.com/ClickHouse/ClickHouse/issues/15170). [#28373](https://github.com/ClickHouse/ClickHouse/pull/28373) ([tavplubix](https://github.com/tavplubix)). +* Avoid error "Division by zero" when denominator is Nullable in functions `divide`, `intDiv` and `modulo`. Closes [#22621](https://github.com/ClickHouse/ClickHouse/issues/22621). [#28352](https://github.com/ClickHouse/ClickHouse/pull/28352) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to parse values of `Date` data type in text formats as `YYYYMMDD` in addition to `YYYY-MM-DD`. This closes [#30870](https://github.com/ClickHouse/ClickHouse/issues/30870). [#30871](https://github.com/ClickHouse/ClickHouse/pull/30871) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Web UI: render bars in table cells. [#29792](https://github.com/ClickHouse/ClickHouse/pull/29792) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* User can now create dictionaries with comments: `CREATE DICTIONARY ... COMMENT 'vaue'` ... [#29899](https://github.com/ClickHouse/ClickHouse/pull/29899) ([Vasily Nemkov](https://github.com/Enmk)). Users now can set comments to database in `CREATE DATABASE` statement ... [#29429](https://github.com/ClickHouse/ClickHouse/pull/29429) ([Vasily Nemkov](https://github.com/Enmk)). +* Introduce `compiled_expression_cache_elements_size` setting. If you will ever want to use this setting, you will already know what it does. [#30667](https://github.com/ClickHouse/ClickHouse/pull/30667) ([Maksim Kita](https://github.com/kitaisreal)). +* clickhouse-format now supports option `--query`. In previous versions you have to pass the query to stdin. [#29325](https://github.com/ClickHouse/ClickHouse/pull/29325) ([凌涛](https://github.com/lingtaolf)). +* Support `ALTER TABLE` for tables in `Memory` databases. Memory databases are used in `clickhouse-local`. [#30866](https://github.com/ClickHouse/ClickHouse/pull/30866) ([tavplubix](https://github.com/tavplubix)). +* Arrays of all serializable types are now supported by `arrayStringConcat`. [#30840](https://github.com/ClickHouse/ClickHouse/pull/30840) ([Nickita Taranov](https://github.com/nickitat)). +* ClickHouse now will account docker/cgroups limitations to get system memory amount. See [#25662](https://github.com/ClickHouse/ClickHouse/issues/25662). [#30574](https://github.com/ClickHouse/ClickHouse/pull/30574) ([Pavel Medvedev](https://github.com/pmed)). +* Fetched table structure for PostgreSQL database is more reliable now. [#30477](https://github.com/ClickHouse/ClickHouse/pull/30477) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Full support of positional arguments in GROUP BY and ORDER BY. [#30433](https://github.com/ClickHouse/ClickHouse/pull/30433) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow extracting non-string element as string using JSONExtractString. This is for [pull/25452#issuecomment-927123287](https://github.com/ClickHouse/ClickHouse/pull/25452#issuecomment-927123287). [#30426](https://github.com/ClickHouse/ClickHouse/pull/30426) ([Amos Bird](https://github.com/amosbird)). +* Added an ability to use FINAL clause in SELECT queries from `GraphiteMergeTree`. [#30360](https://github.com/ClickHouse/ClickHouse/pull/30360) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Minor improvements in replica cloning and enqueuing fetch for broken parts, that should avoid extremely rare hanging of `GET_PART` entries in replication queue. [#30346](https://github.com/ClickHouse/ClickHouse/pull/30346) ([tavplubix](https://github.com/tavplubix)). +* Allow symlinks to files in `user_files` directory for file table function. [#30309](https://github.com/ClickHouse/ClickHouse/pull/30309) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed comparison of `Date32` with `Date`, `DateTime`, `DateTime64` and `String`. [#30219](https://github.com/ClickHouse/ClickHouse/pull/30219) ([liang.huang](https://github.com/lhuang09287750)). +* Allow to remove `SAMPLE BY` expression from `MergeTree` tables (`ALTER TABLE REMOVE SAMPLE BY`). [#30180](https://github.com/ClickHouse/ClickHouse/pull/30180) ([Anton Popov](https://github.com/CurtizJ)). +* Now `Keeper` (as part of `clickhouse-server`) will start asynchronously if it can connect to some other node. [#30170](https://github.com/ClickHouse/ClickHouse/pull/30170) ([alesapin](https://github.com/alesapin)). +* Now `clickhouse-client` supports native multi-line editing. [#30143](https://github.com/ClickHouse/ClickHouse/pull/30143) ([Amos Bird](https://github.com/amosbird)). +* `polygon` dictionaries (reverse geocoding): added support for reading the dictionary content with SELECT query method if setting `store_polygon_key_column` = true. Closes [#30090](https://github.com/ClickHouse/ClickHouse/issues/30090). [#30142](https://github.com/ClickHouse/ClickHouse/pull/30142) ([Maksim Kita](https://github.com/kitaisreal)). +* Add ClickHouse logo to Play UI. [#29674](https://github.com/ClickHouse/ClickHouse/pull/29674) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Better exception message while reading column from Arrow-supported formats like `Arrow`, `ArrowStream`, `Parquet` and `ORC`. This closes [#29926](https://github.com/ClickHouse/ClickHouse/issues/29926). [#29927](https://github.com/ClickHouse/ClickHouse/pull/29927) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix data-race between flush and startup in `Buffer` tables. This can appear in tests. [#29930](https://github.com/ClickHouse/ClickHouse/pull/29930) ([Azat Khuzhin](https://github.com/azat)). +* Fix `lock-order-inversion` between `DROP TABLE` for `DatabaseMemory` and `LiveView`. Live View is an experimental feature. Memory database is used in clickhouse-local. [#29929](https://github.com/ClickHouse/ClickHouse/pull/29929) ([Azat Khuzhin](https://github.com/azat)). +* Fix lock-order-inversion between periodic dictionary reload and config reload. [#29928](https://github.com/ClickHouse/ClickHouse/pull/29928) ([Azat Khuzhin](https://github.com/azat)). +* Update zoneinfo files to 2021c. [#29925](https://github.com/ClickHouse/ClickHouse/pull/29925) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add ability to configure retries and delays between them for `clickhouse-copier`. [#29921](https://github.com/ClickHouse/ClickHouse/pull/29921) ([Azat Khuzhin](https://github.com/azat)). +* Add `shutdown_wait_unfinished_queries` server setting to allowing waiting for running queries up to `shutdown_wait_unfinished` time. This is for [#24451](https://github.com/ClickHouse/ClickHouse/issues/24451). [#29914](https://github.com/ClickHouse/ClickHouse/pull/29914) ([Amos Bird](https://github.com/amosbird)). +* Add ability to trace peak memory usage (with new trace_type in `system.trace_log` - `MemoryPeak`). [#29858](https://github.com/ClickHouse/ClickHouse/pull/29858) ([Azat Khuzhin](https://github.com/azat)). +* PostgreSQL foreign tables: Added partitioned table prefix 'p' for the query for fetching replica identity index. [#29828](https://github.com/ClickHouse/ClickHouse/pull/29828) ([Shoh Jahon](https://github.com/Shohjahon)). +* Apply `max_untracked_memory`/`memory_profiler_step`/`memory_profiler_sample_probability` during mutate/merge to profile memory usage during merges. [#29681](https://github.com/ClickHouse/ClickHouse/pull/29681) ([Azat Khuzhin](https://github.com/azat)). +* Query obfuscator: `clickhouse-format --obfuscate` now works with more types of queries. [#29672](https://github.com/ClickHouse/ClickHouse/pull/29672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed the issue: `clickhouse-format --obfuscate` cannot process queries with embedded dictionaries (functions `regionTo...`). [#29667](https://github.com/ClickHouse/ClickHouse/pull/29667) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect Nullable processing of JSON functions. This fixes [#29615](https://github.com/ClickHouse/ClickHouse/issues/29615) . Mark as improvement because https://github.com/ClickHouse/ClickHouse/pull/28012 is not released. [#29659](https://github.com/ClickHouse/ClickHouse/pull/29659) ([Amos Bird](https://github.com/amosbird)). +* Increase `listen_backlog` by default (to match default in newer linux kernel). [#29643](https://github.com/ClickHouse/ClickHouse/pull/29643) ([Azat Khuzhin](https://github.com/azat)). +* Reload dictionaries, models, user defined executable functions if servers config `dictionaries_config`, `models_config`, `user_defined_executable_functions_config` changes. Closes [#28142](https://github.com/ClickHouse/ClickHouse/issues/28142). [#29529](https://github.com/ClickHouse/ClickHouse/pull/29529) ([Maksim Kita](https://github.com/kitaisreal)). +* Get rid of pointless restriction on projection name. Now projection name can start with `tmp_`. [#29520](https://github.com/ClickHouse/ClickHouse/pull/29520) ([Amos Bird](https://github.com/amosbird)). +* Fixed `There is no query or query context has expired` error in mutations with nested subqueries. Do not allow subqueries in mutation if table is replicated and `allow_nondeterministic_mutations` setting is disabled. [#29495](https://github.com/ClickHouse/ClickHouse/pull/29495) ([tavplubix](https://github.com/tavplubix)). +* Apply config changes to `max_concurrent_queries` during runtime (no need to restart). [#29414](https://github.com/ClickHouse/ClickHouse/pull/29414) ([Raúl Marín](https://github.com/Algunenano)). +* Added setting `use_skip_indexes`. [#29405](https://github.com/ClickHouse/ClickHouse/pull/29405) ([Maksim Kita](https://github.com/kitaisreal)). +* Add support for `FREEZE`ing in-memory parts (for backups). [#29376](https://github.com/ClickHouse/ClickHouse/pull/29376) ([Mo Xuan](https://github.com/mo-avatar)). +* Pass through initial query_id for `clickhouse-benchmark` (previously if you run remote query via `clickhouse-benchmark`, queries on shards will not be linked to the initial query via `initial_query_id`). [#29364](https://github.com/ClickHouse/ClickHouse/pull/29364) ([Azat Khuzhin](https://github.com/azat)). +* Skip indexes `tokenbf_v1` and `ngrambf_v1`: added support for `Array` data type with key of `String` of `FixedString` type. [#29280](https://github.com/ClickHouse/ClickHouse/pull/29280) ([Maksim Kita](https://github.com/kitaisreal)). Skip indexes `tokenbf_v1` and `ngrambf_v1` added support for `Map` data type with key of `String` of `FixedString` type. Author @lingtaolf. [#29220](https://github.com/ClickHouse/ClickHouse/pull/29220) ([Maksim Kita](https://github.com/kitaisreal)). +* Function `has`: added support for `Map` data type. [#29267](https://github.com/ClickHouse/ClickHouse/pull/29267) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `compress_logs` settings for clickhouse-keeper which allow to compress clickhouse-keeper logs (for replicated state machine) in `ZSTD` . Implements: [#26977](https://github.com/ClickHouse/ClickHouse/issues/26977). [#29223](https://github.com/ClickHouse/ClickHouse/pull/29223) ([alesapin](https://github.com/alesapin)). +* Add a setting `external_table_strict_query` - it will force passing the whole WHERE expression in queries to foreign databases even if it is incompatible. [#29206](https://github.com/ClickHouse/ClickHouse/pull/29206) ([Azat Khuzhin](https://github.com/azat)). +* Disable projections when `ARRAY JOIN` is used. In previous versions projection analysis may break aliases in array join. [#29139](https://github.com/ClickHouse/ClickHouse/pull/29139) ([Amos Bird](https://github.com/amosbird)). +* Support more types in `MsgPack` input/output format. [#29077](https://github.com/ClickHouse/ClickHouse/pull/29077) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to input and output `LowCardinality` columns in `ORC` input/output format. [#29062](https://github.com/ClickHouse/ClickHouse/pull/29062) ([Kruglov Pavel](https://github.com/Avogar)). +* Select from `system.distributed_ddl_queue` might show incorrect values, it's fixed. [#29061](https://github.com/ClickHouse/ClickHouse/pull/29061) ([tavplubix](https://github.com/tavplubix)). +* Correct behaviour with unknown methods for HTTP connection. Solves [#29050](https://github.com/ClickHouse/ClickHouse/issues/29050). [#29057](https://github.com/ClickHouse/ClickHouse/pull/29057) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* `clickhouse-keeper`: Fix bug in `clickhouse-keeper-converter` which can lead to some data loss while restoring from ZooKeeper logs (not snapshot). [#29030](https://github.com/ClickHouse/ClickHouse/pull/29030) ([小路](https://github.com/nicelulu)). Fix bug in `clickhouse-keeper-converter` which can lead to incorrect ZooKeeper log deserialization. [#29071](https://github.com/ClickHouse/ClickHouse/pull/29071) ([小路](https://github.com/nicelulu)). +* Apply settings from `CREATE ... AS SELECT` queries (fixes: [#28810](https://github.com/ClickHouse/ClickHouse/issues/28810)). [#28962](https://github.com/ClickHouse/ClickHouse/pull/28962) ([Azat Khuzhin](https://github.com/azat)). +* Respect default database setting for ALTER TABLE ... ON CLUSTER ... REPLACE/MOVE PARTITION FROM/TO ... [#28955](https://github.com/ClickHouse/ClickHouse/pull/28955) ([anneji-dev](https://github.com/anneji-dev)). +* gRPC protocol: Allow change server-side compression from client. [#28953](https://github.com/ClickHouse/ClickHouse/pull/28953) ([Vitaly Baranov](https://github.com/vitlibar)). +* Skip "no data" exception when reading thermal sensors for asynchronous metrics. This closes [#28852](https://github.com/ClickHouse/ClickHouse/issues/28852). [#28882](https://github.com/ClickHouse/ClickHouse/pull/28882) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed logical race condition that might cause `Dictionary not found` error for existing dictionary in rare cases. [#28853](https://github.com/ClickHouse/ClickHouse/pull/28853) ([tavplubix](https://github.com/tavplubix)). +* Relax nested function for If-combinator check (but forbid nested identical combinators). [#28828](https://github.com/ClickHouse/ClickHouse/pull/28828) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible uncaught exception during server termination. [#28761](https://github.com/ClickHouse/ClickHouse/pull/28761) ([Azat Khuzhin](https://github.com/azat)). +* Forbid cleaning of tmp directories that can be used by an active mutation/merge if mutation/merge is extraordinarily long. [#28760](https://github.com/ClickHouse/ClickHouse/pull/28760) ([Azat Khuzhin](https://github.com/azat)). +* Allow optimization `optimize_arithmetic_operations_in_aggregate_functions = 1` when alias is used. [#28746](https://github.com/ClickHouse/ClickHouse/pull/28746) ([Amos Bird](https://github.com/amosbird)). +* Implement `detach_not_byte_identical_parts` setting for `ReplicatedMergeTree`, that will detach instead of remove not byte-identical parts (after mege/mutate). [#28708](https://github.com/ClickHouse/ClickHouse/pull/28708) ([Azat Khuzhin](https://github.com/azat)). +* Implement `max_suspicious_broken_parts_bytes` setting for `MergeTree` (to limit total size of all broken parts, default is `1GiB`). [#28707](https://github.com/ClickHouse/ClickHouse/pull/28707) ([Azat Khuzhin](https://github.com/azat)). +* Enable expanding macros in `RabbitMQ` table settings. [#28683](https://github.com/ClickHouse/ClickHouse/pull/28683) ([Vitaly Baranov](https://github.com/vitlibar)). +* Restore the possibility to read data of a table using the `Log` engine in multiple threads. [#28125](https://github.com/ClickHouse/ClickHouse/pull/28125) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix misbehavior of NULL column handling in JSON functions. This fixes [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930). [#28012](https://github.com/ClickHouse/ClickHouse/pull/28012) ([Amos Bird](https://github.com/amosbird)). +* Allow to set the size of Mark/Uncompressed cache for skip indices separately from columns. [#27961](https://github.com/ClickHouse/ClickHouse/pull/27961) ([Amos Bird](https://github.com/amosbird)). +* Allow to mix JOIN with `USING` with other JOIN types. [#23881](https://github.com/ClickHouse/ClickHouse/pull/23881) ([darkkeks](https://github.com/darkkeks)). +* Update aws-sdk submodule for throttling in Yandex Cloud S3. [#30646](https://github.com/ClickHouse/ClickHouse/pull/30646) ([ianton-ru](https://github.com/ianton-ru)). +* Fix releasing query ID and session ID at the end of query processing while handing gRPC call. [#29954](https://github.com/ClickHouse/ClickHouse/pull/29954) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix shutdown of `AccessControlManager` to fix flaky test. [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix failed assertion in reading from `HDFS`. Update libhdfs3 library to be able to run in tests in debug. Closes [#29251](https://github.com/ClickHouse/ClickHouse/issues/29251). Closes [#27814](https://github.com/ClickHouse/ClickHouse/issues/27814). [#29276](https://github.com/ClickHouse/ClickHouse/pull/29276) ([Kseniia Sumarokova](https://github.com/kssenii)). + + +#### Build/Testing/Packaging Improvement + +* Add support for FreeBSD builds for Aarch64 machines. [#29952](https://github.com/ClickHouse/ClickHouse/pull/29952) ([MikaelUrankar](https://github.com/MikaelUrankar)). +* Recursive submodules are no longer needed for ClickHouse. [#30315](https://github.com/ClickHouse/ClickHouse/pull/30315) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* ClickHouse can be statically built with Musl. This is added as experiment, it does not support building `odbc-bridge`, `library-bridge`, integration with CatBoost and some libraries. [#30248](https://github.com/ClickHouse/ClickHouse/pull/30248) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Enable `Protobuf`, `Arrow`, `ORC`, `Parquet` for `AArch64` and `Darwin` (macOS) builds. This closes [#29248](https://github.com/ClickHouse/ClickHouse/issues/29248). This closes [#28018](https://github.com/ClickHouse/ClickHouse/issues/28018). [#30015](https://github.com/ClickHouse/ClickHouse/pull/30015) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add cross-build for PowerPC (powerpc64le). This closes [#9589](https://github.com/ClickHouse/ClickHouse/issues/9589). Enable support for interaction with MySQL for AArch64 and PowerPC. This closes [#26301](https://github.com/ClickHouse/ClickHouse/issues/26301). [#30010](https://github.com/ClickHouse/ClickHouse/pull/30010) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Leave only required files in cross-compile toolchains. Include them as submodules (earlier they were downloaded as tarballs). [#29974](https://github.com/ClickHouse/ClickHouse/pull/29974) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Implemented structure-aware fuzzing approach in ClickHouse for select statement parser. [#30012](https://github.com/ClickHouse/ClickHouse/pull/30012) ([Paul](https://github.com/PaulCher)). +* Turning on experimental constexpr expressions evaluator for clang to speed up template code compilation. [#29668](https://github.com/ClickHouse/ClickHouse/pull/29668) ([myrrc](https://github.com/myrrc)). +* Add ability to compile using newer version fo glibc without using new symbols. [#29594](https://github.com/ClickHouse/ClickHouse/pull/29594) ([Azat Khuzhin](https://github.com/azat)). +* Reduce Debug build binary size by clang optimization option. [#28736](https://github.com/ClickHouse/ClickHouse/pull/28736) ([flynn](https://github.com/ucasfl)). +* Now all images for CI will be placed in the separate dockerhub repo. [#28656](https://github.com/ClickHouse/ClickHouse/pull/28656) ([alesapin](https://github.com/alesapin)). +* Improve support for build with clang-13. [#28046](https://github.com/ClickHouse/ClickHouse/pull/28046) ([Sergei Semin](https://github.com/syominsergey)). +* Add ability to print raw profile events to `clickhouse-client` (This can be useful for debugging and for testing). [#30064](https://github.com/ClickHouse/ClickHouse/pull/30064) ([Azat Khuzhin](https://github.com/azat)). +* Add time dependency for clickhouse-server unit (systemd and sysvinit init). [#28891](https://github.com/ClickHouse/ClickHouse/pull/28891) ([Azat Khuzhin](https://github.com/azat)). +* Reload stacktrace cache when symbol is reloaded. [#28137](https://github.com/ClickHouse/ClickHouse/pull/28137) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix + +* Functions for case-insensitive search in UTF-8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match in very rare cases, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([tavplubix](https://github.com/tavplubix)). +* Fix reading from empty file on encrypted disk. [#30494](https://github.com/ClickHouse/ClickHouse/pull/30494) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). +* Allow using a materialized column as the sharding key in a distributed table even if `insert_allow_materialized_columns=0`:. [#28637](https://github.com/ClickHouse/ClickHouse/pull/28637) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). +* Fix set index not used in AND/OR expressions when there are more than two operands. This fixes [#30416](https://github.com/ClickHouse/ClickHouse/issues/30416) . [#30887](https://github.com/ClickHouse/ClickHouse/pull/30887) ([Amos Bird](https://github.com/amosbird)). +* Fix crash when projection with hashing function is materialized. This fixes [#30861](https://github.com/ClickHouse/ClickHouse/issues/30861) . The issue is similar to https://github.com/ClickHouse/ClickHouse/pull/28560 which is a lack of proper understanding of the invariant of header's emptyness. [#30877](https://github.com/ClickHouse/ClickHouse/pull/30877) ([Amos Bird](https://github.com/amosbird)). +* Fixed ambiguity when extracting auxiliary ZooKeeper name from ZooKeeper path in `ReplicatedMergeTree`. Previously server might fail to start with `Unknown auxiliary ZooKeeper name` if ZooKeeper path contains a colon. Fixes [#29052](https://github.com/ClickHouse/ClickHouse/issues/29052). Also it was allowed to specify ZooKeeper path that does not start with slash, but now it's deprecated and creation of new tables with such path is not allowed. Slashes and colons in auxiliary ZooKeeper names are not allowed too. [#30822](https://github.com/ClickHouse/ClickHouse/pull/30822) ([tavplubix](https://github.com/tavplubix)). +* Clean temporary directory when localBackup failed by some reason. [#30797](https://github.com/ClickHouse/ClickHouse/pull/30797) ([ianton-ru](https://github.com/ianton-ru)). +* Fixed a race condition between `REPLACE/MOVE PARTITION` and background merge in non-replicated `MergeTree` that might cause a part of moved/replaced data to remain in partition. Fixes [#29327](https://github.com/ClickHouse/ClickHouse/issues/29327). [#30717](https://github.com/ClickHouse/ClickHouse/pull/30717) ([tavplubix](https://github.com/tavplubix)). +* Fix PREWHERE with WHERE in case of always true PREWHERE. [#30668](https://github.com/ClickHouse/ClickHouse/pull/30668) ([Azat Khuzhin](https://github.com/azat)). +* Limit push down optimization could cause a error `Cannot find column`. Fixes [#30438](https://github.com/ClickHouse/ClickHouse/issues/30438). [#30562](https://github.com/ClickHouse/ClickHouse/pull/30562) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing parenthesis for `isNotNull`/`isNull` rewrites to `IS [NOT] NULL` (fixes queries that has something like `isNotNull(1)+isNotNull(2)`). [#30520](https://github.com/ClickHouse/ClickHouse/pull/30520) ([Azat Khuzhin](https://github.com/azat)). +* Fix deadlock on ALTER with scalar subquery to the same table, close [#30461](https://github.com/ClickHouse/ClickHouse/issues/30461). [#30492](https://github.com/ClickHouse/ClickHouse/pull/30492) ([Vladimir C](https://github.com/vdimir)). +* Fixed segfault which might happen if session expired during execution of REPLACE PARTITION. [#30432](https://github.com/ClickHouse/ClickHouse/pull/30432) ([tavplubix](https://github.com/tavplubix)). +* Queries with condition like `IN (subquery)` could return incorrect result in case if aggregate projection applied. Fixed creation of sets for projections. [#30310](https://github.com/ClickHouse/ClickHouse/pull/30310) ([Amos Bird](https://github.com/amosbird)). +* Fix column alias resolution of JOIN queries when projection is enabled. This fixes [#30146](https://github.com/ClickHouse/ClickHouse/issues/30146). [#30293](https://github.com/ClickHouse/ClickHouse/pull/30293) ([Amos Bird](https://github.com/amosbird)). +* Fix some deficiency in `replaceRegexpAll` function. [#30292](https://github.com/ClickHouse/ClickHouse/pull/30292) ([Memo](https://github.com/Joeywzr)). +* Fix ComplexKeyHashedDictionary, ComplexKeySparseHashedDictionary parsing `preallocate` option from layout config. [#30246](https://github.com/ClickHouse/ClickHouse/pull/30246) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `[I]LIKE` function. Closes [#28661](https://github.com/ClickHouse/ClickHouse/issues/28661). [#30244](https://github.com/ClickHouse/ClickHouse/pull/30244) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix crash with shortcircuit and lowcardinality in multiIf. [#30243](https://github.com/ClickHouse/ClickHouse/pull/30243) ([Raúl Marín](https://github.com/Algunenano)). +* FlatDictionary, HashedDictionary fix bytes_allocated calculation for nullable attributes. [#30238](https://github.com/ClickHouse/ClickHouse/pull/30238) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow identifiers starting with numbers in multiple joins. [#30230](https://github.com/ClickHouse/ClickHouse/pull/30230) ([Vladimir C](https://github.com/vdimir)). +* Fix reading from `MergeTree` with `max_read_buffer_size = 0` (when the user wants to shoot himself in the foot) (can lead to exceptions `Can't adjust last granule`, `LOGICAL_ERROR`, or even data loss). [#30192](https://github.com/ClickHouse/ClickHouse/pull/30192) ([Azat Khuzhin](https://github.com/azat)). +* Fix `pread_fake_async`/`pread_threadpool` with `min_bytes_to_use_direct_io`. [#30191](https://github.com/ClickHouse/ClickHouse/pull/30191) ([Azat Khuzhin](https://github.com/azat)). +* Fix INSERT SELECT incorrectly fills MATERIALIZED column based of Nullable column. [#30189](https://github.com/ClickHouse/ClickHouse/pull/30189) ([Azat Khuzhin](https://github.com/azat)). +* Support nullable arguments in function `initializeAggregation`. [#30177](https://github.com/ClickHouse/ClickHouse/pull/30177) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Port is already connected` for queries with `GLOBAL IN` and `WITH TOTALS`. Only for 21.9 and 21.10. [#30086](https://github.com/ClickHouse/ClickHouse/pull/30086) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race between MOVE PARTITION and merges/mutations for MergeTree. [#30074](https://github.com/ClickHouse/ClickHouse/pull/30074) ([Azat Khuzhin](https://github.com/azat)). +* Dropped `Memory` database might reappear after server restart, it's fixed ([#29795](https://github.com/ClickHouse/ClickHouse/issues/29795)). Also added `force_remove_data_recursively_on_drop` setting as a workaround for `Directory not empty` error when dropping `Ordinary` database (because it's not possible to remove data leftovers manually in cloud environment). [#30054](https://github.com/ClickHouse/ClickHouse/pull/30054) ([tavplubix](https://github.com/tavplubix)). +* Fix crash of sample by `tuple()`, closes [#30004](https://github.com/ClickHouse/ClickHouse/issues/30004). [#30016](https://github.com/ClickHouse/ClickHouse/pull/30016) ([flynn](https://github.com/ucasfl)). +* try to close issue: [#29965](https://github.com/ClickHouse/ClickHouse/issues/29965). [#29976](https://github.com/ClickHouse/ClickHouse/pull/29976) ([hexiaoting](https://github.com/hexiaoting)). +* Fix possible data-race between `FileChecker` and `StorageLog`/`StorageStripeLog`. [#29959](https://github.com/ClickHouse/ClickHouse/pull/29959) ([Azat Khuzhin](https://github.com/azat)). +* Fix data-race between `LogSink::writeMarks()` and `LogSource` in `StorageLog`. [#29946](https://github.com/ClickHouse/ClickHouse/pull/29946) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential resource leak of the concurrent query limit of merge tree tables introduced in https://github.com/ClickHouse/ClickHouse/pull/19544. [#29879](https://github.com/ClickHouse/ClickHouse/pull/29879) ([Amos Bird](https://github.com/amosbird)). +* Fix system tables recreation check (fails to detect changes in enum values). [#29857](https://github.com/ClickHouse/ClickHouse/pull/29857) ([Azat Khuzhin](https://github.com/azat)). +* MaterializedMySQL: Fix an issue where if the connection to MySQL was lost, only parts of a transaction could be processed. [#29837](https://github.com/ClickHouse/ClickHouse/pull/29837) ([Håvard Kvålen](https://github.com/havardk)). +* Avoid `Timeout exceeded: elapsed 18446744073.709553 seconds` error that might happen in extremely rare cases, presumably due to some bug in kernel. Fixes [#29154](https://github.com/ClickHouse/ClickHouse/issues/29154). [#29811](https://github.com/ClickHouse/ClickHouse/pull/29811) ([tavplubix](https://github.com/tavplubix)). +* Fix bad cast in `ATTACH TABLE ... FROM 'path'` query when non-string literal is used instead of path. It may lead to reading of uninitialized memory. [#29790](https://github.com/ClickHouse/ClickHouse/pull/29790) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix concurrent access to `LowCardinality` during `GROUP BY` (in combination with `Buffer` tables it may lead to troubles). [#29782](https://github.com/ClickHouse/ClickHouse/pull/29782) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect `GROUP BY` (multiple rows with the same keys in result) in case of distributed query when shards had mixed versions `<= 21.3` and `>= 21.4`, `GROUP BY` key had several columns all with fixed size, and two-level aggregation was activated (see `group_by_two_level_threshold` and `group_by_two_level_threshold_bytes`). Fixes [#29580](https://github.com/ClickHouse/ClickHouse/issues/29580). [#29735](https://github.com/ClickHouse/ClickHouse/pull/29735) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed incorrect behaviour of setting `materialized_postgresql_tables_list` at server restart. Found in [#28529](https://github.com/ClickHouse/ClickHouse/issues/28529). [#29686](https://github.com/ClickHouse/ClickHouse/pull/29686) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Condition in filter predicate could be lost after push-down optimisation. [#29625](https://github.com/ClickHouse/ClickHouse/pull/29625) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix JIT expression compilation with aliases and short-circuit expression evaluation. Closes [#29403](https://github.com/ClickHouse/ClickHouse/issues/29403). [#29574](https://github.com/ClickHouse/ClickHouse/pull/29574) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix rare segfault in `ALTER MODIFY` query when using incorrect table identifier in `DEFAULT` expression like `x.y.z...` Fixes [#29184](https://github.com/ClickHouse/ClickHouse/issues/29184). [#29573](https://github.com/ClickHouse/ClickHouse/pull/29573) ([alesapin](https://github.com/alesapin)). +* Fix nullptr deference for `GROUP BY WITH TOTALS HAVING` (when the column from `HAVING` wasn't selected). [#29553](https://github.com/ClickHouse/ClickHouse/pull/29553) ([Azat Khuzhin](https://github.com/azat)). +* Avoid deadlocks when reading and writting on Join table engine tables at the same time. [#29544](https://github.com/ClickHouse/ClickHouse/pull/29544) ([Raúl Marín](https://github.com/Algunenano)). +* Fix bug in check `pathStartsWith` becuase there was bug with the usage of `std::mismatch`: ` The behavior is undefined if the second range is shorter than the first range.`. [#29531](https://github.com/ClickHouse/ClickHouse/pull/29531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* In ODBC bridge add retries for error Invalid cursor state. It is a retriable error. Closes [#29473](https://github.com/ClickHouse/ClickHouse/issues/29473). [#29518](https://github.com/ClickHouse/ClickHouse/pull/29518) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed incorrect table name parsing on loading of `Lazy` database. Fixes [#29456](https://github.com/ClickHouse/ClickHouse/issues/29456). [#29476](https://github.com/ClickHouse/ClickHouse/pull/29476) ([tavplubix](https://github.com/tavplubix)). +* Fix possible `Block structure mismatch` for subqueries with pushed-down `HAVING` predicate. Fixes [#29010](https://github.com/ClickHouse/ClickHouse/issues/29010). [#29475](https://github.com/ClickHouse/ClickHouse/pull/29475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Logical error `Cannot capture columns` in functions greatest/least. Closes [#29334](https://github.com/ClickHouse/ClickHouse/issues/29334). [#29454](https://github.com/ClickHouse/ClickHouse/pull/29454) ([Kruglov Pavel](https://github.com/Avogar)). +* RocksDB table engine: fix race condition during multiple DB opening (and get back some tests that triggers the problem on CI). [#29393](https://github.com/ClickHouse/ClickHouse/pull/29393) ([Azat Khuzhin](https://github.com/azat)). +* Fix replicated access storage not shutting down cleanly when misconfigured. [#29388](https://github.com/ClickHouse/ClickHouse/pull/29388) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Remove window function `nth_value` as it is not memory-safe. This closes [#29347](https://github.com/ClickHouse/ClickHouse/issues/29347). [#29348](https://github.com/ClickHouse/ClickHouse/pull/29348) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix vertical merges of projection parts. This fixes [#29253](https://github.com/ClickHouse/ClickHouse/issues/29253) . This PR also fixes several projection merge/mutation issues introduced in https://github.com/ClickHouse/ClickHouse/pull/25165. [#29337](https://github.com/ClickHouse/ClickHouse/pull/29337) ([Amos Bird](https://github.com/amosbird)). +* Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Fix connection timeouts (`send_timeout`/`receive_timeout`). [#29282](https://github.com/ClickHouse/ClickHouse/pull/29282) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible `Table columns structure in ZooKeeper is different from local table structure` exception while recreating or creating new replicas of `ReplicatedMergeTree`, when one of table columns have default expressions with case-insensitive functions. [#29266](https://github.com/ClickHouse/ClickHouse/pull/29266) ([Anton Popov](https://github.com/CurtizJ)). +* Send normal `Database doesn't exist error` (`UNKNOWN_DATABASE`) to the client (via TCP) instead of `Attempt to read after eof` (`ATTEMPT_TO_READ_AFTER_EOF`). [#29229](https://github.com/ClickHouse/ClickHouse/pull/29229) ([Azat Khuzhin](https://github.com/azat)). +* Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not allow to reuse previous credentials in case of inter-server secret (Before INSERT via Buffer/Kafka to Distributed table with interserver secret configured for that cluster, may re-use previously set user for that connection). [#29060](https://github.com/ClickHouse/ClickHouse/pull/29060) ([Azat Khuzhin](https://github.com/azat)). +* Handle `any_join_distinct_right_table_keys` when join with dictionary, close [#29007](https://github.com/ClickHouse/ClickHouse/issues/29007). [#29014](https://github.com/ClickHouse/ClickHouse/pull/29014) ([Vladimir C](https://github.com/vdimir)). +* Fix "Not found column ... in block" error, when join on alias column, close [#26980](https://github.com/ClickHouse/ClickHouse/issues/26980). [#29008](https://github.com/ClickHouse/ClickHouse/pull/29008) ([Vladimir C](https://github.com/vdimir)). +* Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bad optimizations of ORDER BY if it contains WITH FILL. This closes [#28908](https://github.com/ClickHouse/ClickHouse/issues/28908). This closes [#26049](https://github.com/ClickHouse/ClickHouse/issues/26049). [#28910](https://github.com/ClickHouse/ClickHouse/pull/28910) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix higher-order array functions (`SIGSEGV` for `arrayCompact`/`ILLEGAL_COLUMN` for `arrayDifference`/`arrayCumSumNonNegative`) with consts. [#28904](https://github.com/ClickHouse/ClickHouse/pull/28904) ([Azat Khuzhin](https://github.com/azat)). +* Fix waiting for mutation with `mutations_sync=2`. [#28889](https://github.com/ClickHouse/ClickHouse/pull/28889) ([Azat Khuzhin](https://github.com/azat)). +* Fix queries to external databases (i.e. MySQL) with multiple columns in IN ( i.e. `(k,v) IN ((1, 2))` ). [#28888](https://github.com/ClickHouse/ClickHouse/pull/28888) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug with `LowCardinality` in short-curcuit function evaluation. Closes [#28884](https://github.com/ClickHouse/ClickHouse/issues/28884). [#28887](https://github.com/ClickHouse/ClickHouse/pull/28887) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of subcolumns from compact parts. [#28873](https://github.com/ClickHouse/ClickHouse/pull/28873) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed a race condition between `DROP PART` and `REPLACE/MOVE PARTITION` that might cause replicas to diverge in rare cases. [#28864](https://github.com/ClickHouse/ClickHouse/pull/28864) ([tavplubix](https://github.com/tavplubix)). +* Fix expressions compilation with short circuit evaluation. [#28821](https://github.com/ClickHouse/ClickHouse/pull/28821) ([Azat Khuzhin](https://github.com/azat)). +* Fix extremely rare case when ReplicatedMergeTree replicas can diverge after hard reboot of all replicas. The error looks like `Part ... intersects (previous|next) part ...`. [#28817](https://github.com/ClickHouse/ClickHouse/pull/28817) ([alesapin](https://github.com/alesapin)). +* Better check for connection usability and also catch any exception in `RabbitMQ` shutdown just in case. [#28797](https://github.com/ClickHouse/ClickHouse/pull/28797) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). +* Fix possible crash for `SELECT` with partially created aggregate projection in case of exception. [#28700](https://github.com/ClickHouse/ClickHouse/pull/28700) ([Amos Bird](https://github.com/amosbird)). +* Fix the coredump in the creation of distributed tables, when the parameters passed in are wrong. [#28686](https://github.com/ClickHouse/ClickHouse/pull/28686) ([Zhiyong Wang](https://github.com/ljcui)). +* Add Settings.Names, Settings.Values aliases for system.processes table. [#28685](https://github.com/ClickHouse/ClickHouse/pull/28685) ([Vitaly](https://github.com/orloffv)). +* Support for S2 Geometry library: Fix the number of arguments required by `s2RectAdd` and `s2RectContains` functions. [#28663](https://github.com/ClickHouse/ClickHouse/pull/28663) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix invalid constant type conversion when Nullable or LowCardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). +* Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). + + ### ClickHouse release v21.10, 2021-10-16 #### Backward Incompatible Change diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index dd6ead7d97f..f13110d7179 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54456) +SET(VERSION_REVISION 54457) SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 11) +SET(VERSION_MINOR 12) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7) -SET(VERSION_DESCRIBE v21.11.1.1-prestable) -SET(VERSION_STRING 21.11.1.1) +SET(VERSION_GITHASH 503a418dedf0011e9040c3a1b6913e0b5488be4c) +SET(VERSION_DESCRIBE v21.12.1.1-prestable) +SET(VERSION_STRING 21.12.1.1) # end of autochange diff --git a/contrib/snappy b/contrib/snappy index 3f194acb57e..fb057edfed8 160000 --- a/contrib/snappy +++ b/contrib/snappy @@ -1 +1 @@ -Subproject commit 3f194acb57e0487531c96b97af61dcbd025a78a3 +Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf diff --git a/debian/changelog b/debian/changelog index 460424bdb36..a2709485e44 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.11.1.1) unstable; urgency=low +clickhouse (21.12.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Thu, 09 Sep 2021 12:03:26 +0300 + -- clickhouse-release Tue, 02 Nov 2021 00:56:42 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 926014da9e6..6f9a957852e 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.11.1.* +ARG version=21.12.1.* RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 48b59d1e754..04842e7a3de 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.11.1.* +ARG version=21.12.1.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index fc8c2ebbe6e..76967da9f9a 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.11.1.* +ARG version=21.12.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 38595d47528..b6a06be2ac7 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -308,12 +308,7 @@ function get_profiles_watchdog function get_profiles { # Collect the profiles - clickhouse-client --port $LEFT_SERVER_PORT --query "set query_profiler_cpu_time_period_ns = 0" - clickhouse-client --port $LEFT_SERVER_PORT --query "set query_profiler_real_time_period_ns = 0" clickhouse-client --port $LEFT_SERVER_PORT --query "system flush logs" & - - clickhouse-client --port $RIGHT_SERVER_PORT --query "set query_profiler_cpu_time_period_ns = 0" - clickhouse-client --port $RIGHT_SERVER_PORT --query "set query_profiler_real_time_period_ns = 0" clickhouse-client --port $RIGHT_SERVER_PORT --query "system flush logs" & wait @@ -634,7 +629,7 @@ create view query_display_names as select * from create view partial_query_times as select * from file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes, - 'test text, query_index int, time_stddev float, time_median float') + 'test text, query_index int, time_stddev float, time_median double') ; -- Report for partial queries that we could only run on the new server (e.g. diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml index cc5dc3795bb..292665c4f68 100644 --- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -5,22 +5,19 @@ + :: true - -
- - - - system -
metric_log
- 7500 - 1000 - + + + + + + 1000000000 diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 0f0ffaca343..6f5ebafdb61 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -23,15 +23,15 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p - `user` — PostgreSQL user. - `password` — User password. -## Dynamically adding new tables to replication +## Dynamically adding new tables to replication {#dynamically-adding-table-to-replication} ``` sql ATTACH TABLE postgres_database.new_table; ``` -It will work as well if there is a setting `materialized_postgresql_tables_list`. +When specifying a specific list of tables in the database using the setting [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list), it will be updated to the current state, taking into account the tables which were added by the `ATTACH TABLE` query. -## Dynamically removing tables from replication +## Dynamically removing tables from replication {#dynamically-removing-table-from-replication} ``` sql DETACH TABLE postgres_database.table_to_remove; @@ -58,7 +58,7 @@ SETTINGS materialized_postgresql_max_block_size = 65536, SELECT * FROM database1.table1; ``` -It is also possible to change settings at run time. +The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query. ``` sql ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index aeaf39e28cb..d08de080e6b 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -787,6 +787,8 @@ Moving data does not interfere with data replication. Therefore, different stora After the completion of background merges and mutations, old parts are removed only after a certain amount of time (`old_parts_lifetime`). During this time, they are not moved to other volumes or disks. Therefore, until the parts are finally removed, they are still taken into account for evaluation of the occupied disk space. +User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](../../../operations/settings/merge-tree-settings.md#min-bytes-to-rebalance-partition-over-jbod) setting. + ## Using S3 for Data Storage {#table_engine-mergetree-s3} `MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`. diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index 19ce19fcc64..27f783a3cea 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -9,45 +9,57 @@ The `Merge` engine (not to be confused with `MergeTree`) does not store data its Reading is automatically parallelized. Writing to a table is not supported. When reading, the indexes of tables that are actually being read are used, if they exist. -The `Merge` engine accepts parameters: the database name and a regular expression for tables. - -## Examples {#examples} - -Example 1: +## Creating a Table {#creating-a-table} ``` sql -Merge(hits, '^WatchLog') + CREATE TABLE ... Engine=Merge(db_name, tables_regexp) ``` -Data will be read from the tables in the `hits` database that have names that match the regular expression ‘`^WatchLog`’. +**Engine Parameters** -Instead of the database name, you can use a constant expression that returns a string. For example, `currentDatabase()`. +- `db_name` — Possible values: + - database name, + - constant expression that returns a string with a database name, for example, `currentDatabase()`, + - `REGEXP(expression)`, where `expression` is a regular expression to match the DB names. + +- `tables_regexp` — A regular expression to match the table names in the specified DB or DBs. Regular expressions — [re2](https://github.com/google/re2) (supports a subset of PCRE), case-sensitive. -See the notes about escaping symbols in regular expressions in the “match” section. +See the notes about escaping symbols in regular expressions in the "match" section. -When selecting tables to read, the `Merge` table itself will not be selected, even if it matches the regex. This is to avoid loops. -It is possible to create two `Merge` tables that will endlessly try to read each others’ data, but this is not a good idea. +## Usage {#usage} + +When selecting tables to read, the `Merge` table itself is not selected, even if it matches the regex. This is to avoid loops. +It is possible to create two `Merge` tables that will endlessly try to read each others' data, but this is not a good idea. The typical way to use the `Merge` engine is for working with a large number of `TinyLog` tables as if with a single table. -Example 2: +## Examples {#examples} -Let’s say you have a old table (WatchLog_old) and decided to change partitioning without moving data to a new table (WatchLog_new) and you need to see data from both tables. +**Example 1** + +Consider two databases `ABC_corporate_site` and `ABC_store`. The `all_visitors` table will contain IDs from the tables `visitors` in both databases. ``` sql -CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) -ENGINE=MergeTree(date, (UserId, EventType), 8192); +CREATE TABLE all_visitors (id UInt32) ENGINE=Merge(REGEXP('ABC_*'), 'visitors'); +``` + +**Example 2** + +Let's say you have an old table `WatchLog_old` and decided to change partitioning without moving data to a new table `WatchLog_new`, and you need to see data from both tables. + +``` sql +CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) + ENGINE=MergeTree(date, (UserId, EventType), 8192); INSERT INTO WatchLog_old VALUES ('2018-01-01', 1, 'hit', 3); -CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) -ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192; +CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) + ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192; INSERT INTO WatchLog_new VALUES ('2018-01-02', 2, 'hit', 3); CREATE TABLE WatchLog as WatchLog_old ENGINE=Merge(currentDatabase(), '^WatchLog'); -SELECT * -FROM WatchLog +SELECT * FROM WatchLog; ``` ``` text @@ -68,5 +80,4 @@ FROM WatchLog **See Also** - [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) - -[Original article](https://clickhouse.com/docs/en/operations/table_engines/merge/) +- [merge](../../../sql-reference/table-functions/merge.md) table function diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index b3233315db3..ff29fef7fe0 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -332,7 +332,7 @@ ORDER BY year, count(*) DESC The following server was used: -Two Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, 16 physical kernels total,128 GiB RAM,8x6 TB HD on hardware RAID-5 +Two Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, 16 physical cores total, 128 GiB RAM, 8x6 TB HD on hardware RAID-5 Execution time is the best of three runs. But starting from the second run, queries read data from the file system cache. No further caching occurs: the data is read out and processed in each run. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index e67c521c4a7..cb09ce1bb2e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -16,10 +16,13 @@ The supported formats are: | [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | | [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | | [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | +| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | | [Template](#format-template) | ✔ | ✔ | | [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | | [CSV](#csv) | ✔ | ✔ | | [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | | [CustomSeparated](#format-customseparated) | ✔ | ✔ | | [Values](#data-format-values) | ✔ | ✔ | | [Vertical](#vertical) | ✗ | ✔ | @@ -33,8 +36,10 @@ The supported formats are: | [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | | [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | | [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | | [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | | [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | | [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | | [TSKV](#tskv) | ✔ | ✔ | | [Pretty](#pretty) | ✗ | ✔ | @@ -51,6 +56,7 @@ The supported formats are: | [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | | [ORC](#data-format-orc) | ✔ | ✔ | | [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [Native](#native) | ✔ | ✔ | | [Null](#null) | ✗ | ✔ | @@ -124,10 +130,17 @@ Only a small set of symbols are escaped. You can easily stumble onto a string va Arrays are written as a list of comma-separated values in square brackets. Number items in the array are formatted as normally. `Date` and `DateTime` types are written in single quotes. Strings are written in single quotes with the same escaping rules as above. -[NULL](../sql-reference/syntax.md) is formatted as `\N`. +[NULL](../sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](../operations/settings/settings.md#settings-format_tsv_null_representation) (default value is `\N`). + + +If setting [input_format_tsv_empty_as_default](../operations/settings/settings.md#settings-input_format_tsv_empty_as_default) is enabled, +empty input fields are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too. Each element of [Nested](../sql-reference/data-types/nested-data-structures/nested.md) structures is represented as array. +In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) to optimize ENUM parsing. + For example: ``` sql @@ -164,17 +177,35 @@ This format is also available under the name `TSVRaw`. ## TabSeparatedWithNames {#tabseparatedwithnames} Differs from the `TabSeparated` format in that the column names are written in the first row. -During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness. +If setting [input_format_with_names_use_header](../operations/settings/settings.md#settings-input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input_format_skip_unknown_fields) is set to 1. +Otherwise, the first row will be skipped. This format is also available under the name `TSVWithNames`. ## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes} Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. -During parsing, the first and second rows are completely ignored. +The first row with names is processed the same way as in `TabSeparatedWithNames` format. +If setting [input_format_with_types_use_header](../operations/settings/settings.md#settings-input_format_with_types_use_header) is set to 1, +the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. This format is also available under the name `TSVWithNamesAndTypes`. +## TabSeparatedRawWithNames {#tabseparatedrawwithnames} + +Differs from `TabSeparatedWithNames` format in that the rows are written without escaping. +When parsing with this format, tabs or linefeeds are not allowed in each field. + +This format is also available under the name `TSVRawWithNames`. + +## TabSeparatedWithNamesAndTypes {#tabseparatedrawwithnamesandtypes} + +Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping. +When parsing with this format, tabs or linefeeds are not allowed in each field. + +This format is also available under the name `TSVRawWithNamesAndNames`. + ## Template {#format-template} This format allows specifying a custom format string with placeholders for values with a specified escaping rule. @@ -195,7 +226,7 @@ where `delimiter_i` is a delimiter between values (`$` symbol can be escaped as - `Raw` (without escaping, similarly to `TSVRaw`) - `None` (no escaping rule, see further) -If an escaping rule is omitted, then `None` will be used. `XML` and `Raw` are suitable only for output. +If an escaping rule is omitted, then `None` will be used. `XML` is suitable only for output. So, for the following format string: @@ -375,17 +406,23 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. -Empty unquoted input values are replaced with default values for the respective columns, if -[input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) -is enabled. +If setting [input_format_csv_empty_as_default](../operations/settings/settings.md#settings-input_format_csv_empty_as_default) is enabled, +empty unquoted input values are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too. -`NULL` is formatted as `\N` or `NULL` or an empty unquoted string (see settings [input_format_csv_unquoted_null_literal_as_null](../operations/settings/settings.md#settings-input_format_csv_unquoted_null_literal_as_null) and [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)). +`NULL` is formatted according to setting [format_csv_null_representation](../operations/settings/settings.md#settings-format_csv_null_representation) (default value is `\N`). + +In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](../operations/settings/settings.md#settings-input_format_csv_enum_as_number) to optimize ENUM parsing. The CSV format supports the output of totals and extremes the same way as `TabSeparated`. ## CSVWithNames {#csvwithnames} -Also prints the header row, similar to [TabSeparatedWithNames](#tabseparatedwithnames). +Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). + +## CSVWithNamesAndTypes {#csvwithnamesandtypes} + +Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). ## CustomSeparated {#format-customseparated} @@ -657,10 +694,21 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie {"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}} ``` +## JSONCompactEachRowWithNames {#jsoncompacteachrowwithnames} + +Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). + ## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes} + +Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). + +## JSONCompactStringsEachRowWithNames {#jsoncompactstringseachrowwithnames} + +Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). + ## JSONCompactStringsEachRowWithNamesAndTypes {#jsoncompactstringseachrowwithnamesandtypes} -Differs from `JSONCompactEachRow`/`JSONCompactStringsEachRow` in that the column names and types are written as the first two rows. +Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). ```json ["'hello'", "multiply(42, number)", "range(5)"] @@ -910,6 +958,13 @@ Array is represented as a varint length (unsigned [LEB128](https://en.wikipedia. For [NULL](../sql-reference/syntax.md#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](../sql-reference/data-types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`. +## RowBinaryWithNames {#rowbinarywithnames} + +Similar to [RowBinary](#rowbinary), but with added header: + +- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) +- N `String`s specifying column names + ## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes} Similar to [RowBinary](#rowbinary), but with added header: diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 3006c08fce6..69df11f1840 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -121,7 +121,7 @@ toc_title: Adopters | Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | | RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | | S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| Sber | Banking, Fintech, Retail, Cloud, Media | — | — | — | [Job advertisement, March 2021](https://career.habr.com/vacancies/1000073536) | +| Sber | Banking, Fintech, Retail, Cloud, Media | — | 128 servers | >1 PB | [Job advertisement, March 2021](https://career.habr.com/vacancies/1000073536) | | scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | | Segment | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) | | sembot.io | Shopping Ads | — | — | — | A comment on LinkedIn, 2020 | diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 58c59ce9f79..81516140f84 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -21,7 +21,7 @@ By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (lineari ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server, but in both cases configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is ``. Keeper configuration has the following parameters: - `tcp_port` — Port for a client to connect (default for ZooKeeper is `2181`). -- `tcp_port_secure` — Secure port for a client to connect. +- `tcp_port_secure` — Secure port for an SSL connection between client and keeper-server. - `server_id` — Unique server id, each participant of the ClickHouse Keeper cluster must have a unique number (1, 2, 3, and so on). - `log_storage_path` — Path to coordination logs, better to store logs on the non-busy device (same for ZooKeeper). - `snapshot_storage_path` — Path to coordination snapshots. @@ -50,7 +50,11 @@ Internal coordination settings are located in `..` section and contain servers description. The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The main parameters for each `` are: +Quorum configuration is located in `.` section and contain servers description. + +The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The parameter can be set `true` if SSL connection is required for internal communication between nodes, or left unspecified otherwise. + +The main parameters for each `` are: - `id` — Server identifier in a quorum. - `hostname` — Hostname where this server is placed. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 6ec0d122e6a..803d10312f3 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -370,7 +370,7 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`.
]]> ``` -## hsts_max_age +## hsts_max_age {#hsts-max-age} Expired time for HSTS in seconds. The default value is 0 means clickhouse disabled HSTS. If you set a positive number, the HSTS will be enabled and the max-age is the number you set. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index a3a258234e1..16ec55f026a 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -328,3 +328,18 @@ Possible values: Default value: `true`. By default, the ClickHouse server checks at table creation the data type of a column for sampling or sampling expression. If you already have tables with incorrect sampling expression and do not want the server to raise an exception during startup, set `check_sample_column_is_correct` to `false`. + +## min_bytes_to_rebalance_partition_over_jbod {#min-bytes-to-rebalance-partition-over-jbod} + +Sets minimal amount of bytes to enable balancing when distributing new big parts over volume disks [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures). + +Possible values: + +- Positive integer. +- 0 — Balancing is disabled. + +Default value: `0`. + +**Usage** + +The value of the `min_bytes_to_rebalance_partition_over_jbod` setting should be less than the value of the [max_bytes_to_merge_at_max_space_in_pool](../../operations/settings/merge-tree-settings.md#max-bytes-to-merge-at-max-space-in-pool) setting. Otherwise, ClickHouse throws an exception. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ff47aa96502..aa98589d6f3 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -399,7 +399,7 @@ Default value: 1. ## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields} -When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats. +When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv), [TabSeparated](../../interfaces/formats.md#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. !!! note "Note" When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. @@ -417,14 +417,20 @@ When enabled, replace empty input fields in TSV with default values. For complex Disabled by default. +## input_format_csv_empty_as_default {#settings-input-format-csv-empty-as-default} + +When enabled, replace empty input fields in CSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Enabled by default. + ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} -Enables or disables parsing enum values as enum ids for TSV input format. +When enabled, always treat enum values as enum ids for TSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. Possible values: -- 0 — Enum values are parsed as values. -- 1 — Enum values are parsed as enum IDs. +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. Default value: 0. @@ -438,10 +444,39 @@ CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' When the `input_format_tsv_enum_as_number` setting is enabled: +Query: + ```sql SET input_format_tsv_enum_as_number = 1; INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +Query: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; +``` + +throws an exception. + +When the `input_format_tsv_enum_as_number` setting is disabled: + +Query: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; SELECT * FROM table_with_enum_column_for_tsv_insert; ``` @@ -456,15 +491,6 @@ Result: └─────┴────────┘ ``` -When the `input_format_tsv_enum_as_number` setting is disabled, the `INSERT` query: - -```sql -SET input_format_tsv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -``` - -throws an exception. - ## input_format_null_as_default {#settings-input-format-null-as-default} Enables or disables the initialization of [NULL](../../sql-reference/syntax.md#null-literal) fields with [default values](../../sql-reference/statements/create/table.md#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). @@ -540,8 +566,40 @@ To improve insert performance, we recommend disabling this check if you are sure Supported formats: -- [CSVWithNames](../../interfaces/formats.md#csvwithnames) -- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames) +- [CSVWithNames](../../interfaces/formats.md#csvwithnames) +- [CSVWithNames](../../interfaces/formats.md#csvwithnamesandtypes) +- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames) +- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes) +- [JSONCompactEachRowWithNames](../../interfaces/formats.md#jsoncompacteachrowwithnames) +- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes) +- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md#jsoncompactstringseachrowwithnames) +- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes) +- [RowBinaryWithNames](../../interfaces/formats.md#rowbinarywithnames-rowbinarywithnames) +- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_with_types_use_header {#settings-input-format-with-types-use-header} + +Controls whether format parser should check if data types from the input data match data types from the target table. + +Supported formats: + +- [CSVWithNames](../../interfaces/formats.md#csvwithnames) +- [CSVWithNames](../../interfaces/formats.md#csvwithnamesandtypes) +- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames) +- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes) +- [JSONCompactEachRowWithNames](../../interfaces/formats.md#jsoncompacteachrowwithnames) +- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes) +- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md#jsoncompactstringseachrowwithnames) +- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes) +- [RowBinaryWithNames](../../interfaces/formats.md#rowbinarywithnames-rowbinarywithnames) +- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes) Possible values: @@ -954,6 +1012,16 @@ Example: log_query_views=1 ``` +## log_formatted_queries {#settings-log-formatted-queries} + +Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table. + +Possible values: + +- 0 — Formatted queries are not logged in the system table. +- 1 — Formatted queries are logged in the system table. + +Default value: `0`. ## log_comment {#settings-log-comment} @@ -1397,6 +1465,32 @@ Minimum count of executing same expression before it is get compiled. Default value: `3`. +## compile_aggregate_expressions {#compile_aggregate_expressions} + +Enables or disables JIT-compilation of aggregate functions to native code. Enabling this setting can improve the performance. + +Possible values: + +- 0 — Aggregation is done without JIT compilation. +- 1 — Aggregation is done using JIT compilation. + +Default value: `1`. + +**See Also** + +- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression) + +## min_count_to_compile_aggregate_expression {#min_count_to_compile_aggregate_expression} + +The minimum number of identical aggregate expressions to start JIT-compilation. Works only if the [compile_aggregate_expressions](#compile_aggregate_expressions) setting is enabled. + +Possible values: + +- Positive integer. +- 0 — Identical aggregate expressions are always JIT-compiled. + +Default value: `3`. + ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md#json) format. @@ -1512,18 +1606,14 @@ When `output_format_json_quote_denormals = 1`, the query returns: The character is interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. -## input_format_csv_unquoted_null_literal_as_null {#settings-input_format_csv_unquoted_null_literal_as_null} - -For CSV input format enables or disables parsing of unquoted `NULL` as literal (synonym for `\N`). - ## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} -Enables or disables parsing enum values as enum ids for CSV input format. +When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. Possible values: -- 0 — Enum values are parsed as values. -- 1 — Enum values are parsed as enum IDs. +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. Default value: 0. @@ -1537,29 +1627,52 @@ CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' When the `input_format_csv_enum_as_number` setting is enabled: +Query: + ```sql SET input_format_csv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +Query: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' +``` + +throws an exception. + +When the `input_format_csv_enum_as_number` setting is disabled: + +Query: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' SELECT * FROM table_with_enum_column_for_csv_insert; ``` Result: ```text -┌──Id─┬─Value─────┐ -│ 102 │ second │ -└─────┴───────────┘ +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value─┐ +│ 103 │ first │ +└─────┴───────┘ ``` -When the `input_format_csv_enum_as_number` setting is disabled, the `INSERT` query: - -```sql -SET input_format_csv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; -``` - -throws an exception. - ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line} Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF). @@ -2876,9 +2989,9 @@ Possible values: Default value: `1`. -## output_format_csv_null_representation {#output_format_csv_null_representation} +## format_csv_null_representation {#format_csv_null_representation} -Defines the representation of `NULL` for [CSV](../../interfaces/formats.md#csv) output format. User can set any string as a value, for example, `My NULL`. +Defines the representation of `NULL` for [CSV](../../interfaces/formats.md#csv) output and input formats. User can set any string as a value, for example, `My NULL`. Default value: `\N`. @@ -2901,7 +3014,7 @@ Result Query ```sql -SET output_format_csv_null_representation = 'My NULL'; +SET format_csv_null_representation = 'My NULL'; SELECT * FROM csv_custom_null FORMAT CSV; ``` @@ -2913,9 +3026,9 @@ My NULL My NULL ``` -## output_format_tsv_null_representation {#output_format_tsv_null_representation} +## format_tsv_null_representation {#format_tsv_null_representation} -Defines the representation of `NULL` for [TSV](../../interfaces/formats.md#tabseparated) output format. User can set any string as a value, for example, `My NULL`. +Defines the representation of `NULL` for [TSV](../../interfaces/formats.md#tabseparated) output and input formats. User can set any string as a value, for example, `My NULL`. Default value: `\N`. @@ -2938,7 +3051,7 @@ Result Query ```sql -SET output_format_tsv_null_representation = 'My NULL'; +SET format_tsv_null_representation = 'My NULL'; SELECT * FROM tsv_custom_null FORMAT TSV; ``` diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index de878cce793..5ba38ab3e67 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -24,6 +24,11 @@ Columns: - `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression. - `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name. +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. **Example** @@ -34,10 +39,11 @@ SELECT * FROM system.columns LIMIT 2 FORMAT Vertical; ```text Row 1: ────── -database: system -table: aggregate_function_combinators -name: name +database: INFORMATION_SCHEMA +table: COLUMNS +name: table_catalog type: String +position: 1 default_kind: default_expression: data_compressed_bytes: 0 @@ -49,13 +55,19 @@ is_in_sorting_key: 0 is_in_primary_key: 0 is_in_sampling_key: 0 compression_codec: +character_octet_length: ᴺᵁᴸᴸ +numeric_precision: ᴺᵁᴸᴸ +numeric_precision_radix: ᴺᵁᴸᴸ +numeric_scale: ᴺᵁᴸᴸ +datetime_precision: ᴺᵁᴸᴸ Row 2: ────── -database: system -table: aggregate_function_combinators -name: is_internal -type: UInt8 +database: INFORMATION_SCHEMA +table: COLUMNS +name: table_schema +type: String +position: 2 default_kind: default_expression: data_compressed_bytes: 0 @@ -67,6 +79,11 @@ is_in_sorting_key: 0 is_in_primary_key: 0 is_in_sampling_key: 0 compression_codec: +character_octet_length: ᴺᵁᴸᴸ +numeric_precision: ᴺᵁᴸᴸ +numeric_precision_radix: ᴺᵁᴸᴸ +numeric_scale: ᴺᵁᴸᴸ +datetime_precision: ᴺᵁᴸᴸ ``` The `system.columns` table contains the following columns (the column type is shown in brackets): diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index c9fc8786333..45eebf2ae85 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -9,6 +9,7 @@ Columns: - `data_path` ([String](../../sql-reference/data-types/string.md)) — Data path. - `metadata_path` ([String](../../sql-reference/data-types/enum.md)) — Metadata path. - `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID. +- `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment. The `name` column from this system table is used for implementing the `SHOW DATABASES` query. @@ -17,22 +18,20 @@ The `name` column from this system table is used for implementing the `SHOW DATA Create a database. ``` sql -CREATE DATABASE test +CREATE DATABASE test; ``` Check all of the available databases to the user. ``` sql -SELECT * FROM system.databases +SELECT * FROM system.databases; ``` ``` text -┌─name───────────────────────────┬─engine─┬─data_path──────────────────┬─metadata_path───────────────────────────────────────────────────────┬─────────────────────────────────uuid─┐ -│ _temporary_and_external_tables │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │ -│ default │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/d31/d317b4bd-3595-4386-81ee-c2334694128a/ │ d317b4bd-3595-4386-81ee-c2334694128a │ -│ test │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/39b/39bf0cc5-4c06-4717-87fe-c75ff3bd8ebb/ │ 39bf0cc5-4c06-4717-87fe-c75ff3bd8ebb │ -│ system │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/1d1/1d1c869d-e465-4b1b-a51f-be033436ebf9/ │ 1d1c869d-e465-4b1b-a51f-be033436ebf9 │ -└────────────────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┘ +┌─name───────────────┬─engine─┬─data_path──────────────────┬─metadata_path───────────────────────────────────────────────────────┬─uuid─────────────────────────────────┬─comment─┐ +│ INFORMATION_SCHEMA │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │ │ +│ default │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/d31/d317b4bd-3595-4386-81ee-c2334694128a/ │ 24363899-31d7-42a0-a436-389931d752a0 │ │ +│ information_schema │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │ │ +│ system │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/1d1/1d1c869d-e465-4b1b-a51f-be033436ebf9/ │ 03e9f3d1-cc88-4a49-83e9-f3d1cc881a49 │ │ +└────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┴─────────┘ ``` - -[Original article](https://clickhouse.com/docs/en/operations/system-tables/databases) diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md new file mode 100644 index 00000000000..df5b012f2b6 --- /dev/null +++ b/docs/en/operations/system-tables/information_schema.md @@ -0,0 +1,210 @@ +# INFORMATION_SCHEMA {#information-schema} + +`INFORMATION_SCHEMA` (`information_schema`) is a system database that contains views. Using these views, you can get information about the metadata of database objects. These views read data from the columns of the [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md) system tables. + +The structure and composition of system tables may change in different versions of the product, but the support of the `information_schema` makes it possible to change the structure of system tables without changing the method of access to metadata. Metadata requests do not depend on the DBMS used. + +``` sql +SHOW TABLES FROM INFORMATION_SCHEMA; +``` + +``` text +┌─name─────┐ +│ COLUMNS │ +│ SCHEMATA │ +│ TABLES │ +│ VIEWS │ +└──────────┘ +``` + +`INFORMATION_SCHEMA` contains the following views: + +- [COLUMNS](#columns) +- [SCHEMATA](#schemata) +- [TABLES](#tables) +- [VIEWS](#views) + +## COLUMNS {#columns} + +Contains columns read from the [system.columns](../../operations/system-tables/columns.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard. + +Columns: + +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `column_name` ([String](../../sql-reference/data-types/string.md)) — Column name. +- `ordinal_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. +- `column_default` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. +- `is_nullable` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column type is `Nullable`. +- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. +- `character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. + +**Example** + +Query: + +``` sql +SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +table_catalog: default +table_schema: default +table_name: describe_example +column_name: id +ordinal_position: 1 +column_default: +is_nullable: 0 +data_type: UInt64 +character_maximum_length: ᴺᵁᴸᴸ +character_octet_length: ᴺᵁᴸᴸ +numeric_precision: 64 +numeric_precision_radix: 2 +numeric_scale: 0 +datetime_precision: ᴺᵁᴸᴸ +character_set_catalog: ᴺᵁᴸᴸ +character_set_schema: ᴺᵁᴸᴸ +character_set_name: ᴺᵁᴸᴸ +collation_catalog: ᴺᵁᴸᴸ +collation_schema: ᴺᵁᴸᴸ +collation_name: ᴺᵁᴸᴸ +domain_catalog: ᴺᵁᴸᴸ +domain_schema: ᴺᵁᴸᴸ +domain_name: ᴺᵁᴸᴸ +``` + +## SCHEMATA {#schemata} + +Contains columns read from the [system.databases](../../operations/system-tables/databases.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard. + +Columns: + +- `catalog_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. +- `schema_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. +- `schema_owner` ([String](../../sql-reference/data-types/string.md)) — Schema owner name, always `'default'`. +- `default_character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `default_character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `default_character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `sql_path` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. + +**Example** + +Query: + +``` sql +SELECT * FROM information_schema.schemata WHERE schema_name ILIKE 'information_schema' LIMIT 1 FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +catalog_name: INFORMATION_SCHEMA +schema_name: INFORMATION_SCHEMA +schema_owner: default +default_character_set_catalog: ᴺᵁᴸᴸ +default_character_set_schema: ᴺᵁᴸᴸ +default_character_set_name: ᴺᵁᴸᴸ +sql_path: ᴺᵁᴸᴸ +``` + +## TABLES {#tables} + +Contains columns read from the [system.tables](../../operations/system-tables/tables.md) system table. + +Columns: + +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Table type. Possible values: + - `BASE TABLE` + - `VIEW` + - `FOREIGN TABLE` + - `LOCAL TEMPORARY` + - `SYSTEM VIEW` + +**Example** + +Query: + +``` sql +SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +table_catalog: default +table_schema: default +table_name: describe_example +table_type: BASE TABLE +``` + +## VIEWS {#views} + +Contains columns read from the [system.tables](../../operations/system-tables/tables.md) system table, when the table engine [View](../../engines/table-engines/special/view.md) is used. + +Columns: + +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view. +- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking. +- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated. +- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view/#materialized). Possible values: + - `NO` — The created view is not materialized. + - `YES` — The created view is materialized. +- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated. +- `is_trigger_deletable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not deleted. +- `is_trigger_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, no data is inserted into the trigger. + +**Example** + +Query: + +``` sql +CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t; +CREATE MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM system.one; +SELECT * FROM information_schema.views WHERE table_schema = currentDatabase() LIMIT 1 FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +table_catalog: default +table_schema: default +table_name: mv +view_definition: SELECT * FROM system.one +check_option: NONE +is_updatable: NO +is_insertable_into: YES +is_trigger_updatable: NO +is_trigger_deletable: NO +is_trigger_insertable_into: NO +``` diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 0cadc975bd4..e3aab04f7dd 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -26,6 +26,8 @@ Each query creates one or two rows in the `query_log` table, depending on the st You can use the [log_queries_probability](../../operations/settings/settings.md#log-queries-probability) setting to reduce the number of queries, registered in the `query_log` table. +You can use the [log_formatted_queries](../../operations/settings/settings.md#settings-log-formatted-queries) setting to log formatted queries to the `formatted_query` column. + Columns: - `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: @@ -48,6 +50,7 @@ Columns: - `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. - `current_database` ([String](../../sql-reference/data-types/string.md)) — Name of the current database. - `query` ([String](../../sql-reference/data-types/string.md)) — Query string. +- `formatted_query` ([String](../../sql-reference/data-types/string.md)) — Formatted query string. - `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identical hash value without the values of literals for similar queries. - `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query. - `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query. @@ -114,68 +117,68 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_t Row 1: ────── type: QueryFinish -event_date: 2021-07-28 -event_time: 2021-07-28 13:46:56 -event_time_microseconds: 2021-07-28 13:46:56.719791 -query_start_time: 2021-07-28 13:46:56 -query_start_time_microseconds: 2021-07-28 13:46:56.704542 -query_duration_ms: 14 -read_rows: 8393 -read_bytes: 374325 +event_date: 2021-11-03 +event_time: 2021-11-03 16:13:54 +event_time_microseconds: 2021-11-03 16:13:54.953024 +query_start_time: 2021-11-03 16:13:54 +query_start_time_microseconds: 2021-11-03 16:13:54.952325 +query_duration_ms: 0 +read_rows: 69 +read_bytes: 6187 written_rows: 0 written_bytes: 0 -result_rows: 4201 -result_bytes: 153024 -memory_usage: 4714038 +result_rows: 69 +result_bytes: 48256 +memory_usage: 0 current_database: default -query: SELECT DISTINCT arrayJoin(extractAll(name, '[\\w_]{2,}')) AS res FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.table_engines UNION ALL SELECT name FROM system.formats UNION ALL SELECT name FROM system.table_functions UNION ALL SELECT name FROM system.data_type_families UNION ALL SELECT name FROM system.merge_tree_settings UNION ALL SELECT name FROM system.settings UNION ALL SELECT cluster FROM system.clusters UNION ALL SELECT macro FROM system.macros UNION ALL SELECT policy_name FROM system.storage_policies UNION ALL SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate UNION ALL SELECT name FROM system.databases LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.tables LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.dictionaries LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.columns LIMIT 10000) WHERE notEmpty(res) -normalized_query_hash: 6666026786019643712 -query_kind: Select -databases: ['system'] -tables: ['system.aggregate_function_combinators','system.clusters','system.columns','system.data_type_families','system.databases','system.dictionaries','system.formats','system.functions','system.macros','system.merge_tree_settings','system.settings','system.storage_policies','system.table_engines','system.table_functions','system.tables'] -columns: ['system.aggregate_function_combinators.name','system.clusters.cluster','system.columns.name','system.data_type_families.name','system.databases.name','system.dictionaries.name','system.formats.name','system.functions.is_aggregate','system.functions.name','system.macros.macro','system.merge_tree_settings.name','system.settings.name','system.storage_policies.policy_name','system.table_engines.name','system.table_functions.name','system.tables.name'] +query: DESCRIBE TABLE system.query_log +formatted_query: +normalized_query_hash: 8274064835331539124 +query_kind: +databases: [] +tables: [] +columns: [] projections: [] +views: [] exception_code: 0 exception: stack_trace: is_initial_query: 1 user: default -query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf +query_id: 7c28bbbb-753b-4eba-98b1-efcbe2b9bdf6 address: ::ffff:127.0.0.1 -port: 51006 +port: 40452 initial_user: default -initial_query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf +initial_query_id: 7c28bbbb-753b-4eba-98b1-efcbe2b9bdf6 initial_address: ::ffff:127.0.0.1 -initial_port: 51006 -initial_query_start_time: 2021-07-28 13:46:56 -initial_query_start_time_microseconds: 2021-07-28 13:46:56.704542 +initial_port: 40452 +initial_query_start_time: 2021-11-03 16:13:54 +initial_query_start_time_microseconds: 2021-11-03 16:13:54.952325 interface: 1 -os_user: -client_hostname: -client_name: ClickHouse client +os_user: sevirov +client_hostname: clickhouse.ru-central1.internal +client_name: ClickHouse client_revision: 54449 client_version_major: 21 -client_version_minor: 8 -client_version_patch: 0 +client_version_minor: 10 +client_version_patch: 1 http_method: 0 http_user_agent: http_referer: forwarded_for: quota_key: -revision: 54453 +revision: 54456 log_comment: -thread_ids: [5058,22097,22110,22094] -ProfileEvents.Names: ['Query','SelectQuery','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSWriteChars'] -ProfileEvents.Values: [1,1,39,352256,64,360,8393,374325,412,440,34480,13108,4723,671,19,17828,8192,10240] -Settings.Names: ['load_balancing','max_memory_usage'] -Settings.Values: ['random','10000000000'] +thread_ids: [30776,31174] +ProfileEvents: {'Query':1,'NetworkSendElapsedMicroseconds':59,'NetworkSendBytes':2643,'SelectedRows':69,'SelectedBytes':6187,'ContextLock':9,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':817,'UserTimeMicroseconds':427,'SystemTimeMicroseconds':212,'OSCPUVirtualTimeMicroseconds':639,'OSReadChars':894,'OSWriteChars':319} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} used_aggregate_functions: [] used_aggregate_function_combinators: [] used_database_engines: [] -used_data_type_families: ['UInt64','UInt8','Nullable','String','date'] +used_data_type_families: [] used_dictionaries: [] used_formats: [] -used_functions: ['concat','notEmpty','extractAll'] +used_functions: [] used_storages: [] used_table_functions: [] ``` @@ -183,6 +186,3 @@ used_table_functions: [] **See Also** - [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. -- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — This table contains information about each view executed during a query. - -[Original article](https://clickhouse.com/docs/en/operations/system-tables/query_log) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index f7da8a39856..0ccf69bc048 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -30,6 +30,8 @@ Columns: - `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine. +- `as_select` ([String](../../sql-reference/data-types/string.md)) - `SELECT` query for view. + - `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table. - `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table. @@ -56,6 +58,7 @@ Columns: - `comment` ([String](../../sql-reference/data-types/string.md)) - The comment for the table. +- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source. The `system.tables` table is used in `SHOW TABLES` query implementation. @@ -80,6 +83,7 @@ dependencies_database: [] dependencies_table: [] create_table_query: CREATE TABLE base.t1 (`n` UInt64) ENGINE = MergeTree ORDER BY n SETTINGS index_granularity = 8192 engine_full: MergeTree ORDER BY n SETTINGS index_granularity = 8192 +as_select: SELECT database AS table_catalog partition_key: sorting_key: n primary_key: n @@ -90,6 +94,7 @@ total_bytes: 99 lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: +has_own_data: 0 Row 2: ────── @@ -105,6 +110,7 @@ dependencies_database: [] dependencies_table: [] create_table_query: CREATE TABLE default.`53r93yleapyears` (`id` Int8, `febdays` Int8) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192 engine_full: MergeTree ORDER BY id SETTINGS index_granularity = 8192 +as_select: SELECT name AS catalog_name partition_key: sorting_key: id primary_key: id @@ -115,6 +121,5 @@ total_bytes: 155 lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: +has_own_data: 0 ``` - -[Original article](https://clickhouse.com/docs/en/operations/system-tables/tables) diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 54c66bb8d13..cc27250b00d 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -1,3 +1,4 @@ +--- toc_priority: 58 toc_title: Usage Recommendations --- @@ -71,8 +72,8 @@ For HDD, enable the write cache. ## File System {#file-system} Ext4 is the most reliable option. Set the mount options `noatime`. -XFS is also suitable, but it hasn’t been as thoroughly tested with ClickHouse. -Most other file systems should also work fine. File systems with delayed allocation work better. +XFS should be avoided. It works mostly fine but there are some reports about lower performance. +Most other file systems should also work fine. ## Linux Kernel {#linux-kernel} diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 3fc5121ebcc..44615628eef 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -25,6 +25,12 @@ Example 2: `uniqArray(arr)` – Counts the number of unique elements in all ‘a -If and -Array can be combined. However, ‘Array’ must come first, then ‘If’. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the ‘cond’ argument won’t be an array. +## -Map {#agg-functions-combinator-map} + +The -Map suffix can be appended to any aggregate function. This will create an aggregate function which gets Map type as an argument, and aggregates values of each key of the map separately using the specified aggregate function. The result is also of a Map type. + +Examples: `sumMap(map(1,1))`, `avgMap(map('a', 1))`. + ## -SimpleState {#agg-functions-combinator-simplestate} If you apply this combinator, the aggregate function returns the same value but with a different type. This is a [SimpleAggregateFunction(...)](../../sql-reference/data-types/simpleaggregatefunction.md) that can be stored in a table to work with [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) tables. diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 93bf2746c3c..b8ec276c7f9 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -213,7 +213,7 @@ SELECT splitByNonAlpha(' 1! a, b. '); ## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator} -Concatenates the strings (values of type String or Nullable(String)) listed in the array with the separator. ’separator’ is an optional parameter: a constant string, set to an empty string by default. +Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default. Returns the string. ## alphaTokens(s) {#alphatokenss} diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index e1a76d2c0ae..12737624ecb 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -155,7 +155,7 @@ ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902 ## FREEZE PARTITION {#alter_freeze-partition} ``` sql -ALTER TABLE table_name FREEZE [PARTITION partition_expr] +ALTER TABLE table_name FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name'] ``` This query creates a local backup of a specified partition. If the `PARTITION` clause is omitted, the query creates the backup of all partitions at once. @@ -169,6 +169,7 @@ At the time of execution, for a data snapshot, the query creates hardlinks to a - `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config. - `N` is the incremental number of the backup. +- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. !!! note "Note" If you use [a set of disks for data storage in a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. diff --git a/docs/en/sql-reference/statements/create/database.md b/docs/en/sql-reference/statements/create/database.md index 3c6f73d54db..787bbc02346 100644 --- a/docs/en/sql-reference/statements/create/database.md +++ b/docs/en/sql-reference/statements/create/database.md @@ -8,7 +8,7 @@ toc_title: DATABASE Creates a new database. ``` sql -CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)] +CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)] [COMMENT 'Comment'] ``` ## Clauses {#clauses} @@ -26,4 +26,33 @@ ClickHouse creates the `db_name` database on all the servers of a specified clus ### ENGINE {#engine} -[MySQL](../../../engines/database-engines/mysql.md) allows you to retrieve data from the remote MySQL server. By default, ClickHouse uses its own [database engine](../../../engines/database-engines/index.md). There’s also a [lazy](../../../engines/database-engines/lazy.md) engine. +By default, ClickHouse uses its own [Atomic](../../../engines/database-engines/atomic.md) database engine. There are also [Lazy](../../../engines/database-engines/lazy.md), [MySQL](../../../engines/database-engines/mysql.md), [PostgresSQL](../../../engines/database-engines/postgresql.md), [MaterializedMySQL](../../../engines/database-engines/materialized-mysql.md), [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md), [Replicated](../../../engines/database-engines/replicated.md), [SQLite](../../../engines/database-engines/sqlite.md). + +### COMMENT {#comment} + +You can add a comment to the database when you creating it. + +The comment is supported for all database engines. + +**Syntax** + +``` sql +CREATE DATABASE db_name ENGINE = engine(...) COMMENT 'Comment' +``` + +**Example** + +Query: + +``` sql +CREATE DATABASE db_comment ENGINE = Memory COMMENT 'The temporary database'; +SELECT name, comment FROM system.databases WHERE name = 'db_comment'; +``` + +Result: + +```text +┌─name───────┬─comment────────────────┐ +│ db_comment │ The temporary database │ +└────────────┴────────────────────────┘ +``` diff --git a/docs/en/sql-reference/table-functions/merge.md b/docs/en/sql-reference/table-functions/merge.md index a5c74b71069..c89f0f4cc5a 100644 --- a/docs/en/sql-reference/table-functions/merge.md +++ b/docs/en/sql-reference/table-functions/merge.md @@ -5,7 +5,23 @@ toc_title: merge # merge {#merge} -`merge(db_name, 'tables_regexp')` – Creates a temporary Merge table. For more information, see the section “Table engines, Merge”. +Creates a temporary [Merge](../../engines/table-engines/special/merge.md) table. The table structure is taken from the first table encountered that matches the regular expression. -The table structure is taken from the first table encountered that matches the regular expression. +**Syntax** + +```sql +merge('db_name', 'tables_regexp') +``` +**Arguments** + +- `db_name` — Possible values: + - database name, + - constant expression that returns a string with a database name, for example, `currentDatabase()`, + - `REGEXP(expression)`, where `expression` is a regular expression to match the DB names. + +- `tables_regexp` — A regular expression to match the table names in the specified DB or DBs. + +**See Also** + +- [Merge](../../engines/table-engines/special/merge.md) table engine diff --git a/docs/ru/engines/database-engines/materialized-postgresql.md b/docs/ru/engines/database-engines/materialized-postgresql.md index 10f86543e73..554996062d5 100644 --- a/docs/ru/engines/database-engines/materialized-postgresql.md +++ b/docs/ru/engines/database-engines/materialized-postgresql.md @@ -23,6 +23,20 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p - `user` — пользователь PostgreSQL. - `password` — пароль пользователя. +## Динамическое добавление новых таблиц в репликацию {#dynamically-adding-table-to-replication} + +``` sql +ATTACH TABLE postgres_database.new_table; +``` + +При указании конкретного списка таблиц в базе с помощью настройки [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list), он будет обновлен (в `.sql` метаданных) на актуальный с учетом таблиц, добавленных с помощью запроса `ATTACH TABLE`. + +## Динамическое удаление таблиц из репликации {#dynamically-removing-table-from-replication} + +``` sql +DETACH TABLE postgres_database.table_to_remove; +``` + ## Настройки {#settings} - [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size) @@ -44,6 +58,12 @@ SETTINGS materialized_postgresql_max_block_size = 65536, SELECT * FROM database1.table1; ``` +Настройки можно при необходимости изменить с помощью DDL запроса. Однако с помощью него нельзя изменить настройку `materialized_postgresql_tables_list`, для обновления списка таблиц в данной настройке нужно использовать запрос `ATTACH TABLE`. + +``` sql +ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; +``` + ## Требования {#requirements} 1. Настройка [wal_level](https://postgrespro.ru/docs/postgrespro/10/runtime-config-wal) должна иметь значение `logical`, параметр `max_replication_slots` должен быть равен по меньшей мере `2` в конфигурационном файле в PostgreSQL. diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md index ef8a58c4c82..7322f23fe0e 100644 --- a/docs/ru/engines/table-engines/integrations/rabbitmq.md +++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md @@ -30,6 +30,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [rabbitmq_skip_broken_messages = N,] [rabbitmq_max_block_size = N,] [rabbitmq_flush_interval_ms = N] + [rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish'] ``` Обязательные параметры: @@ -51,6 +52,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `rabbitmq_skip_broken_messages` – максимальное количество некорректных сообщений в блоке. Если `rabbitmq_skip_broken_messages = N`, то движок отбрасывает `N` сообщений, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0. - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` +- `rabbitmq_queue_settings_list` - позволяет самостоятельно установить настройки RabbitMQ при создании очереди. Доступные настройки: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. Настрока `durable` для очереди ставится автоматически. Настройки форматов данных также могут быть добавлены в списке RabbitMQ настроек. diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 07e67ad1b85..4448372c522 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -775,6 +775,8 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' После выполнения фоновых слияний или мутаций старые куски не удаляются сразу, а через некоторое время (табличная настройка `old_parts_lifetime`). Также они не перемещаются на другие тома или диски, поэтому до момента удаления они продолжают учитываться при подсчёте занятого дискового пространства. +Пользователь может сбалансированно распределять новые большие куски данных по разным дискам тома [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures), используя настройку [min_bytes_to_rebalance_partition_over_jbod](../../../operations/settings/merge-tree-settings.md#min-bytes-to-rebalance-partition-over-jbod). + ## Использование сервиса S3 для хранения данных {#table_engine-mergetree-s3} Таблицы семейства `MergeTree` могут хранить данные в сервисе [S3](https://aws.amazon.com/s3/) при использовании диска типа `s3`. diff --git a/docs/ru/engines/table-engines/special/merge.md b/docs/ru/engines/table-engines/special/merge.md index 714b087c201..47f8e871e47 100644 --- a/docs/ru/engines/table-engines/special/merge.md +++ b/docs/ru/engines/table-engines/special/merge.md @@ -7,43 +7,56 @@ toc_title: Merge Движок `Merge` (не путайте с движком `MergeTree`) не хранит данные самостоятельно, а позволяет читать одновременно из произвольного количества других таблиц. Чтение автоматически распараллеливается. Запись в таблицу не поддерживается. При чтении будут использованы индексы тех таблиц, из которых реально идёт чтение, если они существуют. -Движок `Merge` принимает параметры: имя базы данных и регулярное выражение для таблиц. -Пример: +## Создание таблицы {#creating-a-table} ``` sql -Merge(hits, '^WatchLog') + CREATE TABLE ... Engine=Merge(db_name, tables_regexp) ``` -Данные будут читаться из таблиц в базе `hits`, имена которых соответствуют регулярному выражению ‘`^WatchLog`’. +**Параметры движка** -Вместо имени базы данных может использоваться константное выражение, возвращающее строку. Например, `currentDatabase()`. +- `db_name` — Возможные варианты: + - имя БД, + - выражение, возвращающее строку с именем БД, например, `currentDatabase()`, + - `REGEXP(expression)`, где `expression` — регулярное выражение для отбора БД. + +- `tables_regexp` — регулярное выражение для имен таблиц в указанной БД или нескольких БД. + +## Использование {#usage} Регулярные выражения — [re2](https://github.com/google/re2) (поддерживает подмножество PCRE), регистрозависимые. Смотрите замечание об экранировании в регулярных выражениях в разделе «match». -При выборе таблиц для чтения, сама `Merge`-таблица не будет выбрана, даже если попадает под регулярное выражение, чтобы не возникло циклов. -Впрочем, вы можете создать две `Merge`-таблицы, которые будут пытаться бесконечно читать данные друг друга, но делать этого не нужно. +При выборе таблиц для чтения сама `Merge`-таблица не будет выбрана, даже если попадает под регулярное выражение, чтобы не возникло циклов. +Впрочем, вы можете создать две `Merge`-таблицы, которые будут пытаться бесконечно читать данные друг друга, но делать этого не рекомендуется. -Типичный способ использования движка `Merge` — работа с большим количеством таблиц типа `TinyLog`, как с одной. +Типичный способ использования движка `Merge` — работа с большим количеством таблиц типа `TinyLog` как с одной. -Пример 2: +**Пример 1** + +Пусть есть две БД `ABC_corporate_site` и `ABC_store`. Таблица `all_visitors` будет содержать ID из таблиц `visitors` в обеих БД. + +``` sql +CREATE TABLE all_visitors (id UInt32) ENGINE=Merge(REGEXP('ABC_*'), 'visitors'); +``` + +**Пример 2** Пусть есть старая таблица `WatchLog_old`. Необходимо изменить партиционирование без перемещения данных в новую таблицу `WatchLog_new`. При этом в выборке должны участвовать данные обеих таблиц. ``` sql CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) -ENGINE=MergeTree(date, (UserId, EventType), 8192); + ENGINE=MergeTree(date, (UserId, EventType), 8192); INSERT INTO WatchLog_old VALUES ('2018-01-01', 1, 'hit', 3); CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) -ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192; + ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192; INSERT INTO WatchLog_new VALUES ('2018-01-02', 2, 'hit', 3); CREATE TABLE WatchLog as WatchLog_old ENGINE=Merge(currentDatabase(), '^WatchLog'); -SELECT * -FROM WatchLog +SELECT * FROM WatchLog; ``` ``` text @@ -61,7 +74,7 @@ FROM WatchLog В секции `WHERE/PREWHERE` можно установить константное условие на столбец `_table` (например, `WHERE _table='xyz'`). В этом случае операции чтения выполняются только для тех таблиц, для которых выполняется условие на значение `_table`, таким образом, столбец `_table` работает как индекс. -**Смотрите также** +**См. также** - [Виртуальные столбцы](index.md#table_engines-virtual_columns) - +- Табличная функция [merge](../../../sql-reference/table-functions/merge.md) diff --git a/docs/ru/introduction/distinctive-features.md b/docs/ru/introduction/distinctive-features.md index dedb1412dbf..93e6a8d8e91 100644 --- a/docs/ru/introduction/distinctive-features.md +++ b/docs/ru/introduction/distinctive-features.md @@ -30,11 +30,13 @@ toc_title: "Отличительные возможности ClickHouse" Почти все перечисленные ранее столбцовые СУБД не поддерживают распределённую обработку запроса. В ClickHouse данные могут быть расположены на разных шардах. Каждый шард может представлять собой группу реплик, которые используются для отказоустойчивости. Запрос будет выполнен на всех шардах параллельно. Это делается прозрачно для пользователя. -## Поддержка SQL {#podderzhka-sql} +## Поддержка SQL {#sql-support} -ClickHouse поддерживает декларативный язык запросов на основе SQL и во многих случаях совпадающий с SQL стандартом. -Поддерживаются GROUP BY, ORDER BY, подзапросы в секциях FROM, IN, JOIN, а также скалярные подзапросы. -Зависимые подзапросы и оконные функции не поддерживаются. +ClickHouse поддерживает [декларативный язык запросов на основе SQL](../sql-reference/index.md) и во [многих случаях](../sql-reference/ansi.md) совпадающий с SQL стандартом. + +Поддерживаются [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), подзапросы в секциях [FROM](../sql-reference/statements/select/from.md), [IN](../sql-reference/operators/in.md), [JOIN](../sql-reference/statements/select/join.md), [функции window](../sql-reference/window-functions/index.md), а также скалярные подзапросы. + +Зависимые подзапросы не поддерживаются, но могут стать доступными в будущем. ## Векторный движок {#vektornyi-dvizhok} diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index c8e3dadae97..ef2a99ebffc 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -368,6 +368,16 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ``` +## hsts_max_age {#hsts-max-age} + +Срок действия HSTS в секундах. Значение по умолчанию `0` (HSTS выключен). Для включения HSTS задайте положительное число. Срок действия HSTS будет равен введенному числу. + +**Пример** + +```xml +600000 +``` + ## include_from {#server_configuration_parameters-include_from} Путь к файлу с подстановками. diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index 08ea9979426..117ed7d633b 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -327,3 +327,18 @@ Eсли суммарное число активных кусков во все Значение по умолчанию: `true`. По умолчанию сервер ClickHouse при создании таблицы проверяет тип данных столбца для сэмплирования или выражения сэмплирования. Если уже существуют таблицы с некорректным выражением сэмплирования, то чтобы не возникало исключение при запуске сервера, установите `check_sample_column_is_correct` в значение `false`. + +## min_bytes_to_rebalance_partition_over_jbod {#min-bytes-to-rebalance-partition-over-jbod} + +Устанавливает минимальное количество байтов для обеспечения балансировки при распределении новых больших кусков данных по дискам тома [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures). + +Возможные значения: + +- Положительное целое число. +- 0 — балансировка отключена. + +Значение по умолчанию: `0`. + +**Использование** + +Значение настройки `min_bytes_to_rebalance_partition_over_jbod` должно быть меньше значения настройки [max_bytes_to_merge_at_max_space_in_pool](../../operations/settings/merge-tree-settings.md#max-bytes-to-merge-at-max-space-in-pool). Иначе ClickHouse сгенерирует исключение. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e5efa657620..255e743b11d 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -922,6 +922,17 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' log_query_threads=1 ``` +## log_formatted_queries {#settings-log-formatted-queries} + +Позволяет регистрировать отформатированные запросы в системной таблице [system.query_log](../../operations/system-tables/query_log.md). + +Возможные значения: + +- 0 — отформатированные запросы не регистрируются в системной таблице. +- 1 — отформатированные запросы регистрируются в системной таблице. + +Значение по умолчанию: `0`. + ## log_comment {#settings-log-comment} Задаёт значение поля `log_comment` таблицы [system.query_log](../system-tables/query_log.md) и текст комментария в логе сервера. @@ -1361,6 +1372,32 @@ load_balancing = round_robin Значение по умолчанию: `3`. +## compile_aggregate_expressions {#compile_aggregate_expressions} + +Включает или отключает компиляцию агрегатных функций в нативный код во время выполнения запроса. Включение этой настройки может улучшить производительность выполнения запросов. + +Возможные значения: + +- 0 — агрегатные функции не компилируются в нативный код. +- 1 — агрегатные функции компилируются в нативный код в процессе выполнения запроса. + +Значение по умолчанию: `1`. + +**См. также** + +- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression) + +## min_count_to_compile_aggregate_expression {#min_count_to_compile_aggregate_expression} + +Минимальное количество вызовов агрегатной функции с одинаковым выражением, при котором функция будет компилироваться в нативный код в ходе выполнения запроса. Работает только если включена настройка [compile_aggregate_expressions](#compile_aggregate_expressions). + +Возможные значения: + +- Целое положительное число. +- 0 — агрегатные функциии всегда компилируются в ходе выполнения запроса. + +Значение по умолчанию: `3`. + ## input_format_skip_unknown_fields {#input-format-skip-unknown-fields} Если значение равно true, то при выполнении INSERT входные данные из столбцов с неизвестными именами будут пропущены. В противном случае эта ситуация создаст исключение. diff --git a/docs/ru/operations/system-tables/columns.md b/docs/ru/operations/system-tables/columns.md index a896360b3f9..b5108386ce8 100644 --- a/docs/ru/operations/system-tables/columns.md +++ b/docs/ru/operations/system-tables/columns.md @@ -24,6 +24,11 @@ Cтолбцы: - `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в первичный ключ. - `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки. - `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия. +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальная длина в байтах для двоичных данных, символьных данных или текстовых данных и изображений. В ClickHouse имеет смысл только для типа данных `FixedString`. Иначе возвращается значение `NULL`. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — точность приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse это разрядность для целочисленных типов и десятичная точность для типов `Decimal`. Иначе возвращается значение `NULL`. +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — основание системы счисления точности приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse значение столбца равно 2 для целочисленных типов и 10 — для типов `Decimal`. Иначе возвращается значение `NULL`. +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — масштаб приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse имеет смысл только для типов `Decimal`. Иначе возвращается значение `NULL`. +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — десятичная точность для данных типа `DateTime64`. Для других типов данных возвращается значение `NULL`. **Пример** @@ -34,10 +39,11 @@ SELECT * FROM system.columns LIMIT 2 FORMAT Vertical; ```text Row 1: ────── -database: system -table: aggregate_function_combinators -name: name +database: INFORMATION_SCHEMA +table: COLUMNS +name: table_catalog type: String +position: 1 default_kind: default_expression: data_compressed_bytes: 0 @@ -49,13 +55,19 @@ is_in_sorting_key: 0 is_in_primary_key: 0 is_in_sampling_key: 0 compression_codec: +character_octet_length: ᴺᵁᴸᴸ +numeric_precision: ᴺᵁᴸᴸ +numeric_precision_radix: ᴺᵁᴸᴸ +numeric_scale: ᴺᵁᴸᴸ +datetime_precision: ᴺᵁᴸᴸ Row 2: ────── -database: system -table: aggregate_function_combinators -name: is_internal -type: UInt8 +database: INFORMATION_SCHEMA +table: COLUMNS +name: table_schema +type: String +position: 2 default_kind: default_expression: data_compressed_bytes: 0 @@ -67,4 +79,9 @@ is_in_sorting_key: 0 is_in_primary_key: 0 is_in_sampling_key: 0 compression_codec: +character_octet_length: ᴺᵁᴸᴸ +numeric_precision: ᴺᵁᴸᴸ +numeric_precision_radix: ᴺᵁᴸᴸ +numeric_scale: ᴺᵁᴸᴸ +datetime_precision: ᴺᵁᴸᴸ ``` diff --git a/docs/ru/operations/system-tables/data_skipping_indices.md b/docs/ru/operations/system-tables/data_skipping_indices.md index 39e13ed1d5a..d57d62cf08b 100644 --- a/docs/ru/operations/system-tables/data_skipping_indices.md +++ b/docs/ru/operations/system-tables/data_skipping_indices.md @@ -10,6 +10,9 @@ - `type` ([String](../../sql-reference/data-types/string.md)) — тип индекса. - `expr` ([String](../../sql-reference/data-types/string.md)) — выражение, используемое для вычисления индекса. - `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество гранул в блоке данных. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатых данных в байтах. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер несжатых данных в байтах. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер засечек в байтах. **Пример** @@ -26,6 +29,9 @@ name: clicks_idx type: minmax expr: clicks granularity: 1 +data_compressed_bytes: 58 +data_uncompressed_bytes: 6 +marks: 48 Row 2: ────── @@ -35,4 +41,7 @@ name: contacts_null_idx type: minmax expr: assumeNotNull(contacts_null) granularity: 1 +data_compressed_bytes: 58 +data_uncompressed_bytes: 6 +marks: 48 ``` diff --git a/docs/ru/operations/system-tables/information_schema.md b/docs/ru/operations/system-tables/information_schema.md new file mode 100644 index 00000000000..b61418931bd --- /dev/null +++ b/docs/ru/operations/system-tables/information_schema.md @@ -0,0 +1,210 @@ +# INFORMATION_SCHEMA {#information-schema} + +`INFORMATION_SCHEMA` (`information_schema`) — это системная база данных, содержащая представления. Используя эти представления, вы можете получить информацию о метаданных объектов базы данных. Эти представления считывают данные из столбцов системных таблиц [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) и [system.tables](../../operations/system-tables/tables.md). + +Структура и состав системных таблиц могут меняться в разных версиях СУБД ClickHouse, но поддержка `information_schema` позволяет изменять структуру системных таблиц без изменения способа доступа к метаданным. Запросы метаданных не зависят от используемой СУБД. + +``` sql +SHOW TABLES FROM INFORMATION_SCHEMA; +``` + +``` text +┌─name─────┐ +│ COLUMNS │ +│ SCHEMATA │ +│ TABLES │ +│ VIEWS │ +└──────────┘ +``` + +`INFORMATION_SCHEMA` содержит следующие представления: + +- [COLUMNS](#columns) +- [SCHEMATA](#schemata) +- [TABLES](#tables) +- [VIEWS](#views) + +## COLUMNS {#columns} + +Содержит столбцы, которые считываются из системной таблицы [system.columns](../../operations/system-tables/columns.md), и столбцы, которые не поддерживаются в ClickHouse или не имеют смысла (всегда имеют значение `NULL`), но должны быть по стандарту. + +Столбцы: + +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. +- `column_name` ([String](../../sql-reference/data-types/string.md)) — имя столбца. +- `ordinal_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — порядковый номер столбца в таблице (нумерация начинается с 1). +- `column_default` ([String](../../sql-reference/data-types/string.md)) — выражение для значения по умолчанию или пустая строка. +- `is_nullable` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий является ли столбец типа `Nullable`. +- `data_type` ([String](../../sql-reference/data-types/string.md)) — тип столбца. +- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальная длина в байтах для двоичных данных, символьных данных или текстовых данных и изображений. В ClickHouse имеет смысл только для типа данных `FixedString`. Иначе возвращается значение `NULL`. +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальная длина в байтах для двоичных данных, символьных данных или текстовых данных и изображений. В ClickHouse имеет смысл только для типа данных `FixedString`. Иначе возвращается значение `NULL`. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — точность приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse это разрядность для целочисленных типов и десятичная точность для типов `Decimal`. Иначе возвращается значение `NULL`. +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — основание системы счисления точности приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse значение столбца равно 2 для целочисленных типов и 10 — для типов `Decimal`. Иначе возвращается значение `NULL`. +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — масштаб приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse имеет смысл только для типов `Decimal`. Иначе возвращается значение `NULL`. +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — десятичная точность для данных типа `DateTime64`. Для других типов данных возвращается значение `NULL`. +- `character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `collation_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `collation_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `collation_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical; +``` + +Результат: + +``` text +Row 1: +────── +table_catalog: default +table_schema: default +table_name: describe_example +column_name: id +ordinal_position: 1 +column_default: +is_nullable: 0 +data_type: UInt64 +character_maximum_length: ᴺᵁᴸᴸ +character_octet_length: ᴺᵁᴸᴸ +numeric_precision: 64 +numeric_precision_radix: 2 +numeric_scale: 0 +datetime_precision: ᴺᵁᴸᴸ +character_set_catalog: ᴺᵁᴸᴸ +character_set_schema: ᴺᵁᴸᴸ +character_set_name: ᴺᵁᴸᴸ +collation_catalog: ᴺᵁᴸᴸ +collation_schema: ᴺᵁᴸᴸ +collation_name: ᴺᵁᴸᴸ +domain_catalog: ᴺᵁᴸᴸ +domain_schema: ᴺᵁᴸᴸ +domain_name: ᴺᵁᴸᴸ +``` + +## SCHEMATA {#schemata} + +Содержит столбцы, которые считываются из системной таблицы [system.databases](../../operations/system-tables/databases.md), и столбцы, которые не поддерживаются в ClickHouse или не имеют смысла (всегда имеют значение `NULL`), но должны быть по стандарту. + +Столбцы: + +- `catalog_name` ([String](../../sql-reference/data-types/string.md)) — имя базы данных. +- `schema_name` ([String](../../sql-reference/data-types/string.md)) — имя базы данных. +- `schema_owner` ([String](../../sql-reference/data-types/string.md)) — имя владельца схемы, всегда `'default'`. +- `default_character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `default_character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `default_character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. +- `sql_path` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM information_schema.schemata WHERE schema_name ILIKE 'information_schema' LIMIT 1 FORMAT Vertical; +``` + +Результат: + +``` text +Row 1: +────── +catalog_name: INFORMATION_SCHEMA +schema_name: INFORMATION_SCHEMA +schema_owner: default +default_character_set_catalog: ᴺᵁᴸᴸ +default_character_set_schema: ᴺᵁᴸᴸ +default_character_set_name: ᴺᵁᴸᴸ +sql_path: ᴺᵁᴸᴸ +``` + +## TABLES {#tables} + +Содержит столбцы, которые считываются из системной таблицы [system.tables](../../operations/system-tables/tables.md). + +Столбцы: + +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. +- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип таблицы. Возможные значения: + - `BASE TABLE` + - `VIEW` + - `FOREIGN TABLE` + - `LOCAL TEMPORARY` + - `SYSTEM VIEW` + +**Пример** + +Запрос: + +``` sql +SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical; +``` + +Результат: + +``` text +Row 1: +────── +table_catalog: default +table_schema: default +table_name: describe_example +table_type: BASE TABLE +``` + +## VIEWS {#views} + +Содержит столбцы, которые считываются из системной таблицы [system.tables](../../operations/system-tables/tables.md), если использован движок [View](../../engines/table-engines/special/view.md). + +Столбцы: + +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. +- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` запрос для представления. +- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, нет проверки. +- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, представление не обновляется. +- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — показывает является ли представление [материализованным](../../sql-reference/statements/create/view/#materialized). Возможные значения: + - `NO` — создано обычное представление. + - `YES` — создано материализованное представление. +- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, триггер не обновляется. +- `is_trigger_deletable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, триггер не удаляется. +- `is_trigger_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, данные не вставляются в триггер. + +**Пример** + +Запрос: + +``` sql +CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t; +CREATE MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM system.one; +SELECT * FROM information_schema.views WHERE table_schema = currentDatabase() LIMIT 1 FORMAT Vertical; +``` + +Результат: + +``` text +Row 1: +────── +table_catalog: default +table_schema: default +table_name: mv +view_definition: SELECT * FROM system.one +check_option: NONE +is_updatable: NO +is_insertable_into: YES +is_trigger_updatable: NO +is_trigger_deletable: NO +is_trigger_insertable_into: NO +``` diff --git a/docs/ru/operations/system-tables/parts.md b/docs/ru/operations/system-tables/parts.md index c73e1566a95..dd3d945daf5 100644 --- a/docs/ru/operations/system-tables/parts.md +++ b/docs/ru/operations/system-tables/parts.md @@ -38,6 +38,12 @@ - `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – размер файла с засечками. +- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – общий размер сжатых данных для вторичных индексов в куске данных. Вспомогательные файлы (например, файлы с засечками) не включены. + +- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – общий размер несжатых данных для вторичных индексов в куске данных. Вспомогательные файлы (например, файлы с засечками) не включены. + +- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – размер файла с засечками для вторичных индексов. + - `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – время модификации директории с куском данных. Обычно соответствует времени создания куска. - `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – время, когда кусок стал неактивным. @@ -119,6 +125,9 @@ rows: 6 bytes_on_disk: 310 data_compressed_bytes: 157 data_uncompressed_bytes: 91 +secondary_indices_compressed_bytes: 58 +secondary_indices_uncompressed_bytes: 6 +secondary_indices_marks_bytes: 48 marks_bytes: 144 modification_time: 2020-06-18 13:01:49 remove_time: 0000-00-00 00:00:00 diff --git a/docs/ru/operations/system-tables/query_log.md b/docs/ru/operations/system-tables/query_log.md index f7709d6f3da..644cee853cc 100644 --- a/docs/ru/operations/system-tables/query_log.md +++ b/docs/ru/operations/system-tables/query_log.md @@ -26,6 +26,8 @@ ClickHouse не удаляет данные из таблица автомати Чтобы уменьшить количество запросов, регистрирующихся в таблице `query_log`, вы можете использовать настройку [log_queries_probability](../../operations/settings/settings.md#log-queries-probability). +Чтобы регистрировать отформатированные запросы в столбце `formatted_query`, вы можете использовать настройку [log_formatted_queries](../../operations/settings/settings.md#settings-log-formatted-queries). + Столбцы: - `type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события, произошедшего при выполнении запроса. Значения: @@ -48,6 +50,7 @@ ClickHouse не удаляет данные из таблица автомати - `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — потребление RAM запросом. - `current_database` ([String](../../sql-reference/data-types/string.md)) — имя текущей базы данных. - `query` ([String](../../sql-reference/data-types/string.md)) — текст запроса. +- `formatted_query` ([String](../../sql-reference/data-types/string.md)) — текст отформатированного запроса. - `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — идентичная хэш-сумма без значений литералов для аналогичных запросов. - `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — тип запроса. - `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена баз данных, присутствующих в запросе. @@ -113,74 +116,72 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_t Row 1: ────── type: QueryFinish -event_date: 2021-07-28 -event_time: 2021-07-28 13:46:56 -event_time_microseconds: 2021-07-28 13:46:56.719791 -query_start_time: 2021-07-28 13:46:56 -query_start_time_microseconds: 2021-07-28 13:46:56.704542 -query_duration_ms: 14 -read_rows: 8393 -read_bytes: 374325 +event_date: 2021-11-03 +event_time: 2021-11-03 16:13:54 +event_time_microseconds: 2021-11-03 16:13:54.953024 +query_start_time: 2021-11-03 16:13:54 +query_start_time_microseconds: 2021-11-03 16:13:54.952325 +query_duration_ms: 0 +read_rows: 69 +read_bytes: 6187 written_rows: 0 written_bytes: 0 -result_rows: 4201 -result_bytes: 153024 -memory_usage: 4714038 +result_rows: 69 +result_bytes: 48256 +memory_usage: 0 current_database: default -query: SELECT DISTINCT arrayJoin(extractAll(name, '[\\w_]{2,}')) AS res FROM (SELECT name FROM system.functions UNION ALL SELECT name FROM system.table_engines UNION ALL SELECT name FROM system.formats UNION ALL SELECT name FROM system.table_functions UNION ALL SELECT name FROM system.data_type_families UNION ALL SELECT name FROM system.merge_tree_settings UNION ALL SELECT name FROM system.settings UNION ALL SELECT cluster FROM system.clusters UNION ALL SELECT macro FROM system.macros UNION ALL SELECT policy_name FROM system.storage_policies UNION ALL SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate UNION ALL SELECT name FROM system.databases LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.tables LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.dictionaries LIMIT 10000 UNION ALL SELECT DISTINCT name FROM system.columns LIMIT 10000) WHERE notEmpty(res) -normalized_query_hash: 6666026786019643712 -query_kind: Select -databases: ['system'] -tables: ['system.aggregate_function_combinators','system.clusters','system.columns','system.data_type_families','system.databases','system.dictionaries','system.formats','system.functions','system.macros','system.merge_tree_settings','system.settings','system.storage_policies','system.table_engines','system.table_functions','system.tables'] -columns: ['system.aggregate_function_combinators.name','system.clusters.cluster','system.columns.name','system.data_type_families.name','system.databases.name','system.dictionaries.name','system.formats.name','system.functions.is_aggregate','system.functions.name','system.macros.macro','system.merge_tree_settings.name','system.settings.name','system.storage_policies.policy_name','system.table_engines.name','system.table_functions.name','system.tables.name'] +query: DESCRIBE TABLE system.query_log +formatted_query: +normalized_query_hash: 8274064835331539124 +query_kind: +databases: [] +tables: [] +columns: [] projections: [] +views: [] exception_code: 0 exception: stack_trace: is_initial_query: 1 user: default -query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf +query_id: 7c28bbbb-753b-4eba-98b1-efcbe2b9bdf6 address: ::ffff:127.0.0.1 -port: 51006 +port: 40452 initial_user: default -initial_query_id: a3361f6e-a1fd-4d54-9f6f-f93a08bab0bf +initial_query_id: 7c28bbbb-753b-4eba-98b1-efcbe2b9bdf6 initial_address: ::ffff:127.0.0.1 -initial_port: 51006 -initial_query_start_time: 2021-07-28 13:46:56 -initial_query_start_time_microseconds: 2021-07-28 13:46:56.704542 +initial_port: 40452 +initial_query_start_time: 2021-11-03 16:13:54 +initial_query_start_time_microseconds: 2021-11-03 16:13:54.952325 interface: 1 -os_user: -client_hostname: -client_name: ClickHouse client +os_user: sevirov +client_hostname: clickhouse.ru-central1.internal +client_name: ClickHouse client_revision: 54449 client_version_major: 21 -client_version_minor: 8 -client_version_patch: 0 +client_version_minor: 10 +client_version_patch: 1 http_method: 0 http_user_agent: http_referer: forwarded_for: quota_key: -revision: 54453 +revision: 54456 log_comment: -thread_ids: [5058,22097,22110,22094] -ProfileEvents.Names: ['Query','SelectQuery','ArenaAllocChunks','ArenaAllocBytes','FunctionExecute','NetworkSendElapsedMicroseconds','SelectedRows','SelectedBytes','ContextLock','RWLockAcquiredReadLocks','RealTimeMicroseconds','UserTimeMicroseconds','SystemTimeMicroseconds','SoftPageFaults','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds','OSWriteBytes','OSWriteChars'] -ProfileEvents.Values: [1,1,39,352256,64,360,8393,374325,412,440,34480,13108,4723,671,19,17828,8192,10240] -Settings.Names: ['load_balancing','max_memory_usage'] -Settings.Values: ['random','10000000000'] +thread_ids: [30776,31174] +ProfileEvents: {'Query':1,'NetworkSendElapsedMicroseconds':59,'NetworkSendBytes':2643,'SelectedRows':69,'SelectedBytes':6187,'ContextLock':9,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':817,'UserTimeMicroseconds':427,'SystemTimeMicroseconds':212,'OSCPUVirtualTimeMicroseconds':639,'OSReadChars':894,'OSWriteChars':319} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} used_aggregate_functions: [] used_aggregate_function_combinators: [] used_database_engines: [] -used_data_type_families: ['UInt64','UInt8','Nullable','String','date'] +used_data_type_families: [] used_dictionaries: [] used_formats: [] -used_functions: ['concat','notEmpty','extractAll'] +used_functions: [] used_storages: [] used_table_functions: [] ``` -**Смотрите также** +**См. также** - [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — в этой таблице содержится информация о цепочке каждого выполненного запроса. - -[Оригинальная статья](https://clickhouse.com/docs/ru/operations/system_tables/query_log) diff --git a/docs/ru/operations/system-tables/tables.md b/docs/ru/operations/system-tables/tables.md index 03ad174780f..bf47051442e 100644 --- a/docs/ru/operations/system-tables/tables.md +++ b/docs/ru/operations/system-tables/tables.md @@ -9,20 +9,37 @@ Столбцы: - `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица. + - `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. + - `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров). + - `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет. + - `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе. + - `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе. + - `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время последней модификации табличных метаданных. + - `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — зависимости базы данных. + - `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы). + - `create_table_query` ([String](../../sql-reference/data-types/string.md)) — запрос, при помощи которого создавалась таблица. + - `engine_full` ([String](../../sql-reference/data-types/string.md)) — параметры табличного движка. + +- `as_select` ([String](../../sql-reference/data-types/string.md)) - `SELECT` запрос для представления. + - `partition_key` ([String](../../sql-reference/data-types/string.md)) — ключ партиционирования таблицы. + - `sorting_key` ([String](../../sql-reference/data-types/string.md)) — ключ сортировки таблицы. + - `primary_key` ([String](../../sql-reference/data-types/string.md)) - первичный ключ таблицы. + - `sampling_key` ([String](../../sql-reference/data-types/string.md)) — ключ сэмплирования таблицы. + - `storage_policy` ([String](../../sql-reference/data-types/string.md)) - политика хранения данных: - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) @@ -41,6 +58,8 @@ - `comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к таблице. +- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий хранит ли таблица сама какие-то данные на диске или только обращается к какому-то другому источнику. + Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`. **Пример** @@ -64,6 +83,7 @@ dependencies_database: [] dependencies_table: [] create_table_query: CREATE TABLE base.t1 (`n` UInt64) ENGINE = MergeTree ORDER BY n SETTINGS index_granularity = 8192 engine_full: MergeTree ORDER BY n SETTINGS index_granularity = 8192 +as_select: SELECT database AS table_catalog partition_key: sorting_key: n primary_key: n @@ -74,6 +94,7 @@ total_bytes: 99 lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: +has_own_data: 0 Row 2: ────── @@ -89,6 +110,7 @@ dependencies_database: [] dependencies_table: [] create_table_query: CREATE TABLE default.`53r93yleapyears` (`id` Int8, `febdays` Int8) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192 engine_full: MergeTree ORDER BY id SETTINGS index_granularity = 8192 +as_select: SELECT name AS catalog_name partition_key: sorting_key: id primary_key: id @@ -99,4 +121,5 @@ total_bytes: 155 lifetime_rows: ᴺᵁᴸᴸ lifetime_bytes: ᴺᵁᴸᴸ comment: +has_own_data: 0 ``` diff --git a/docs/ru/sql-reference/functions/geo/h3.md b/docs/ru/sql-reference/functions/geo/h3.md index db96f0caa1d..8f7b98f0a45 100644 --- a/docs/ru/sql-reference/functions/geo/h3.md +++ b/docs/ru/sql-reference/functions/geo/h3.md @@ -227,6 +227,42 @@ SELECT h3ToGeo(644325524701193974) coordinates; └───────────────────────────────────────┘ ``` +## h3ToGeoBoundary {#h3togeoboundary} + +Возвращает массив пар `(lon, lat)`, который соответствует границе указанного H3 индекса. + +**Синтаксис** + +``` sql +h3ToGeoBoundary(h3Index) +``` + +**Аргументы** + +- `h3Index` — H3 индекс. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Возвращаемые значения** + +- Массив пар '(lon, lat)'. +Тип: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). + + +**Пример** + +Запрос: + +``` sql +SELECT h3ToGeoBoundary(644325524701193974) AS coordinates; +``` + +Результат: + +``` text +┌─h3ToGeoBoundary(599686042433355775)────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [(37.2713558667319,-121.91508032705622),(37.353926450852256,-121.8622232890249),(37.42834118609435,-121.92354999630156),(37.42012867767779,-122.03773496427027),(37.33755608435299,-122.090428929044),(37.26319797461824,-122.02910130919001)] │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## h3kRing {#h3kring} Возвращает [H3](#h3index)-индексы шестигранников в радиусе `k` от данного в произвольном порядке. diff --git a/docs/ru/sql-reference/functions/rounding-functions.md b/docs/ru/sql-reference/functions/rounding-functions.md index 1eede1ea57c..bcd47973a21 100644 --- a/docs/ru/sql-reference/functions/rounding-functions.md +++ b/docs/ru/sql-reference/functions/rounding-functions.md @@ -173,7 +173,7 @@ roundBankers(4.5) = 4 roundBankers(3.55, 1) = 3.6 roundBankers(3.65, 1) = 3.6 roundBankers(10.35, 1) = 10.4 -roundBankers(10.755, 2) = 11,76 +roundBankers(10.755, 2) = 10,76 ``` **Смотрите также** diff --git a/docs/ru/sql-reference/functions/splitting-merging-functions.md b/docs/ru/sql-reference/functions/splitting-merging-functions.md index 595ad22ac46..b3f91077dfa 100644 --- a/docs/ru/sql-reference/functions/splitting-merging-functions.md +++ b/docs/ru/sql-reference/functions/splitting-merging-functions.md @@ -212,8 +212,8 @@ SELECT splitByNonAlpha(' 1! a, b. '); ## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator} -Склеивает строки, перечисленные в массиве, с разделителем separator. -separator - необязательный параметр, константная строка, по умолчанию равен пустой строке. +Склеивает строковые представления элементов массива с разделителем `separator`. +`separator` - необязательный параметр, константная строка, по умолчанию равен пустой строке. Возвращается строка. ## alphaTokens(s) {#alphatokenss} diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index f875103a498..7cc6d5b31c1 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -165,7 +165,7 @@ ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr ## FREEZE PARTITION {#alter_freeze-partition} ``` sql -ALTER TABLE table_name FREEZE [PARTITION partition_expr] +ALTER TABLE table_name FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name'] ``` Создаёт резервную копию для заданной партиции. Если выражение `PARTITION` опущено, резервные копии будут созданы для всех партиций. @@ -179,6 +179,7 @@ ALTER TABLE table_name FREEZE [PARTITION partition_expr] - `/var/lib/clickhouse/` — рабочая директория ClickHouse, заданная в конфигурационном файле; - `N` — инкрементальный номер резервной копии. +- если задан параметр `WITH NAME`, то вместо инкрементального номера используется значение параметра `'backup_name'`. !!! note "Примечание" При использовании [нескольких дисков для хранения данных таблицы](../../statements/alter/index.md#table_engine-mergetree-multiple-volumes) директория `shadow/N` появляется на каждом из дисков, на которых были куски, попавшие под выражение `PARTITION`. diff --git a/docs/ru/sql-reference/statements/create/database.md b/docs/ru/sql-reference/statements/create/database.md index 7d19f3e8f17..b697f4caada 100644 --- a/docs/ru/sql-reference/statements/create/database.md +++ b/docs/ru/sql-reference/statements/create/database.md @@ -8,27 +8,51 @@ toc_title: "База данных" Создает базу данных. ``` sql -CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)] +CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)] [COMMENT 'Comment'] ``` -### Секции {#sektsii} +## Секции {#clauses} -- `IF NOT EXISTS` +### IF NOT EXISTS {#if-not-exists} - Если база данных с именем `db_name` уже существует, то ClickHouse не создаёт базу данных и: - - Не генерирует исключение, если секция указана. - - Генерирует исключение, если секция не указана. +Если база данных с именем `db_name` уже существует, то ClickHouse не создает базу данных и: -- `ON CLUSTER` +- Не генерирует исключение, если секция указана. +- Генерирует исключение, если секция не указана. - ClickHouse создаёт базу данных `db_name` на всех серверах указанного кластера. +### ON CLUSTER {#on-cluster} -- `ENGINE` +ClickHouse создаёт базу данных с именем `db_name` на всех серверах указанного кластера. Более подробную информацию смотрите в разделе [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). - - MySQL +### ENGINE {#engine} - Позволяет получать данные с удаленного сервера MySQL. +По умолчанию ClickHouse использует собственный движок баз данных [Atomic](../../../engines/database-engines/atomic.md). Есть также движки баз данных [Lazy](../../../engines/database-engines/lazy.md), [MySQL](../../../engines/database-engines/mysql.md), [PostgresSQL](../../../engines/database-engines/postgresql.md), [MaterializedMySQL](../../../engines/database-engines/materialized-mysql.md), [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md), [Replicated](../../../engines/database-engines/replicated.md), [SQLite](../../../engines/database-engines/sqlite.md). - По умолчанию ClickHouse использует собственный движок баз данных. +### COMMENT {#comment} - +Вы можете добавить комментарий к базе данных при ее создании. + +Комментарий поддерживается для всех движков баз данных. + +**Синтаксис** + +``` sql +CREATE DATABASE db_name ENGINE = engine(...) COMMENT 'Comment' +``` + +**Пример** + +Запрос: + +``` sql +CREATE DATABASE db_comment ENGINE = Memory COMMENT 'The temporary database'; +SELECT name, comment FROM system.databases WHERE name = 'db_comment'; +``` + +Результат: + +```text +┌─name───────┬─comment────────────────┐ +│ db_comment │ The temporary database │ +└────────────┴────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/table-functions/merge.md b/docs/ru/sql-reference/table-functions/merge.md index 5b33f458468..24246103d64 100644 --- a/docs/ru/sql-reference/table-functions/merge.md +++ b/docs/ru/sql-reference/table-functions/merge.md @@ -5,7 +5,22 @@ toc_title: merge # merge {#merge} -`merge(db_name, 'tables_regexp')` - создаёт временную таблицу типа Merge. Подробнее смотрите раздел «Движки таблиц, Merge». +Cоздаёт временную таблицу типа [Merge](../../engines/table-engines/special/merge.md). Структура таблицы берётся из первой попавшейся таблицы, подходящей под регулярное выражение. -Структура таблицы берётся из первой попавшейся таблицы, подходящей под регулярное выражение. +**Синтаксис** +```sql +merge('db_name', 'tables_regexp') +``` +**Аргументы** + +- `db_name` — Возможные варианты: + - имя БД, + - выражение, возвращающее строку с именем БД, например, `currentDatabase()`, + - `REGEXP(expression)`, где `expression` — регулярное выражение для отбора БД. + +- `tables_regexp` — регулярное выражение для имен таблиц в указанной БД или нескольких БД. + +**См. также** + +- Табличный движок [Merge](../../engines/table-engines/special/merge.md) diff --git a/docs/tools/build.py b/docs/tools/build.py index aa440ecb5dc..08329c33271 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -117,6 +117,9 @@ def build_for_lang(lang, args): ) ) + # Clean to be safe if last build finished abnormally + single_page.remove_temporary_files(lang, args) + raw_config['nav'] = nav.build_docs_nav(lang, args) cfg = config.load_config(**raw_config) diff --git a/docs/tools/single_page.py b/docs/tools/single_page.py index 0e82a1acb87..2c0ddebf3c7 100644 --- a/docs/tools/single_page.py +++ b/docs/tools/single_page.py @@ -12,6 +12,7 @@ import test import util import website +TEMPORARY_FILE_NAME = 'single.md' def recursive_values(item): if isinstance(item, dict): @@ -101,6 +102,14 @@ def concatenate(lang, docs_path, single_page_file, nav): single_page_file.flush() +def get_temporary_file_name(lang, args): + return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME) + +def remove_temporary_files(lang, args): + single_md_path = get_temporary_file_name(lang, args) + if os.path.exists(single_md_path): + os.unlink(single_md_path) + def build_single_page_version(lang, args, nav, cfg): logging.info(f'Building single page version for {lang}') @@ -109,7 +118,7 @@ def build_single_page_version(lang, args, nav, cfg): extra['single_page'] = True extra['is_amp'] = False - single_md_path = os.path.join(args.docs_dir, lang, 'single.md') + single_md_path = get_temporary_file_name(lang, args) with open(single_md_path, 'w') as single_md: concatenate(lang, args.docs_dir, single_md, nav) @@ -226,5 +235,4 @@ def build_single_page_version(lang, args, nav, cfg): logging.info(f'Finished building single page version for {lang}') - if os.path.exists(single_md_path): - os.unlink(single_md_path) + remove_temporary_files(lang, args) diff --git a/docs/zh/operations/settings/permissions-for-queries.md b/docs/zh/operations/settings/permissions-for-queries.md index a72500b76d5..93e439ae206 100644 --- a/docs/zh/operations/settings/permissions-for-queries.md +++ b/docs/zh/operations/settings/permissions-for-queries.md @@ -9,52 +9,51 @@ toc_title: "\u67E5\u8BE2\u6743\u9650" ClickHouse中的查询可以分为几种类型: -1. 读取数据查询: `SELECT`, `SHOW`, `DESCRIBE`, `EXISTS`. -2. 写入数据查询: `INSERT`, `OPTIMIZE`. -3. 更改设置查询: `SET`, `USE`. -4. [DDL](https://en.wikipedia.org/wiki/Data_definition_language) 查询: `CREATE`, `ALTER`, `RENAME`, `ATTACH`, `DETACH`, `DROP` `TRUNCATE`. -5. `KILL QUERY`. +1. 读取数据的查询: `SELECT` , `SHOW` , `DESCRIBE` , `EXISTS` 。 +2. 写入数据的查询: `INSERT` , `OPTIMIZE` 。 +3. 更改设置的查询: `SET` , `USE` 。 +4. [DDL](https://zh.wikipedia.org/zh-cn/数据定义语言) 查询: `CREATE` , `ALTER` , `RENAME` , `ATTACH` , `DETACH` , `DROP` , `TRUNCATE` 。 +5. `KILL QUERY` 。 以下设置按查询类型规范用户权限: -- [只读](#settings_readonly) — Restricts permissions for all types of queries except DDL queries. -- [allow_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries. +- [readonly](#settings_readonly) — 对除 DDL 查询以外的所有类型限制权限。 +- [allow_ddl](#settings_allow_ddl) — 对 DDL 查询限制权限。 `KILL QUERY` 可以与任何设置进行。 -## 只读 {#settings_readonly} +## readonly {#settings_readonly} 限制读取数据、写入数据和更改设置查询的权限。 -查看查询如何划分为多种类型 [以上](#permissions_for_queries). +查看查询如何划分为 **[上述](#permissions_for_queries)** 的多种类型。 可能的值: -- 0 — All queries are allowed. -- 1 — Only read data queries are allowed. -- 2 — Read data and change settings queries are allowed. +- 0 — 所有查询都被允许。 +- 1 — 只有读取数据的查询被允许。 +- 2 — 读取数据以及变更设置的查询被允许。 -设置后 `readonly = 1`,用户无法更改 `readonly` 和 `allow_ddl` 当前会话中的设置。 +设置为 `readonly = 1` 后,用户无法在当前会话中更改 `readonly` 和 `allow_ddl` 设置。 -使用时 `GET` 方法中的 [HTTP接口](../../interfaces/http.md), `readonly = 1` 自动设置。 要修改数据,请使用 `POST` 方法。 +当使用 [HTTP接口](../../interfaces/http.md) 中的 `GET` 方法时,将自动设置为 `readonly = 1` 。 要修改数据,请使用 `POST` 方法。 -设置 `readonly = 1` 禁止用户更改所有设置。 有一种方法可以禁止用户 -从只更改特定设置,有关详细信息,请参阅 [对设置的限制](constraints-on-settings.md). +设置 `readonly = 1` 将禁止用户的更改任何设置。有一种方法可以只禁止用户更改特定的设置,有关详细信息,请参阅 [对设置的限制](constraints-on-settings.md)。 默认值:0 ## allow_ddl {#settings_allow_ddl} -允许或拒绝 [DDL](https://en.wikipedia.org/wiki/Data_definition_language) 查询。 +允许或拒绝 [DDL](https://zh.wikipedia.org/zh-cn/数据定义语言) 查询。 -查看查询如何划分为多种类型 [以上](#permissions_for_queries). +从 [上文](#permissions_for_queries) 查看查询是如何被划分为多种类型的。 可能的值: -- 0 — DDL queries are not allowed. -- 1 — DDL queries are allowed. +- 0 — DDL 查询不被允许。 +- 1 — DDL 查询被允许。 -你不能执行 `SET allow_ddl = 1` 如果 `allow_ddl = 0` 对于当前会话。 +如果对当前会话 `allow_ddl = 0` ,你就不能执行 `SET allow_ddl = 1` 。 默认值:1 diff --git a/docs/zh/operations/system-tables/crash-log.md b/docs/zh/operations/system-tables/crash-log.md deleted file mode 120000 index d1aa67601bc..00000000000 --- a/docs/zh/operations/system-tables/crash-log.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/crash-log.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/crash-log.md b/docs/zh/operations/system-tables/crash-log.md new file mode 100644 index 00000000000..bd5c97937de --- /dev/null +++ b/docs/zh/operations/system-tables/crash-log.md @@ -0,0 +1,48 @@ +# system.crash_log {#system-tables_crash_log} + +包含有关致命错误堆栈跟踪的信息.该表默认不存在于数据库中, 仅在发生致命错误时才创建. + +列信息: + +- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — 事件日期. +- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 事件时间. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 以纳秒为单位的事件时间戳. +- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — 信号编号. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 线程ID. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — 查询ID. +- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 崩溃时的堆栈跟踪.每个元素都是 ClickHouse 服务器进程内的一个虚拟内存地址. +- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 崩溃时的堆栈跟踪.每个元素在 ClickHouse 服务器进程中包含一个被调用的方法. +- `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse 服务器版本. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse 服务器订正版本. +- `build_id` ([String](../../sql-reference/data-types/string.md)) — 编译器生成的 BuildID. + +**案例** + +查询: + +``` sql +SELECT * FROM system.crash_log ORDER BY event_time DESC LIMIT 1; +``` + +结果 (部分): + +``` text +Row 1: +────── +event_date: 2020-10-14 +event_time: 2020-10-14 15:47:40 +timestamp_ns: 1602679660271312710 +signal: 11 +thread_id: 23624 +query_id: 428aab7c-8f5c-44e9-9607-d16b44467e69 +trace: [188531193,...] +trace_full: ['3. DB::(anonymous namespace)::FunctionFormatReadableTimeDelta::executeImpl(std::__1::vector >&, std::__1::vector > const&, unsigned long, unsigned long) const @ 0xb3cc1f9 in /home/username/work/ClickHouse/build/programs/clickhouse',...] +version: ClickHouse 20.11.1.1 +revision: 54442 +build_id: +``` + +**另请参阅** +- [trace_log](../../operations/system-tables/trace_log.md) 系统表 + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/crash-log) diff --git a/docs/zh/operations/system-tables/current-roles.md b/docs/zh/operations/system-tables/current-roles.md deleted file mode 120000 index b9f1f9d6e9d..00000000000 --- a/docs/zh/operations/system-tables/current-roles.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/current-roles.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/current-roles.md b/docs/zh/operations/system-tables/current-roles.md new file mode 100644 index 00000000000..8b9ddec2e0b --- /dev/null +++ b/docs/zh/operations/system-tables/current-roles.md @@ -0,0 +1,11 @@ +# system.current_roles {#system_tables-current_roles} + +包含当前用户的激活角色. `SET ROLE` 修改该表的内容. + +列信息: + + - `role_name` ([String](../../sql-reference/data-types/string.md))) — 角色名称. + - `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示 `current_role` 是否是具有 `ADMIN OPTION` 权限的角色的标志. + - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示 `current_role` 是否为默认角色的标志. + + [原始文章](https://clickhouse.com/docs/en/operations/system-tables/current-roles) diff --git a/docs/zh/operations/system-tables/data_skipping_indices.md b/docs/zh/operations/system-tables/data_skipping_indices.md deleted file mode 120000 index 2820987bf69..00000000000 --- a/docs/zh/operations/system-tables/data_skipping_indices.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/data_skipping_indices.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/data_skipping_indices.md b/docs/zh/operations/system-tables/data_skipping_indices.md new file mode 100644 index 00000000000..3278a7f830f --- /dev/null +++ b/docs/zh/operations/system-tables/data_skipping_indices.md @@ -0,0 +1,47 @@ +# system.data_skipping_indices {#system-data-skipping-indices} + +包含有关所有表中现有数据跳过索引的信息. + +列信息: + +- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称. +- `table` ([String](../../sql-reference/data-types/string.md)) — 数据表名称. +- `name` ([String](../../sql-reference/data-types/string.md)) — 索引名称. +- `type` ([String](../../sql-reference/data-types/string.md)) — 索引类型. +- `expr` ([String](../../sql-reference/data-types/string.md)) — 索引计算表达式. +- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 块中颗粒的数量. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 压缩数据的大小, 以字节为单位. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 解压缩数据的大小, 以字节为单位. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 标记的大小, 以字节为单位. + +**示例** + +```sql +SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical; +``` + +```text +Row 1: +────── +database: default +table: user_actions +name: clicks_idx +type: minmax +expr: clicks +granularity: 1 +data_compressed_bytes: 58 +data_uncompressed_bytes: 6 +marks: 48 + +Row 2: +────── +database: default +table: users +name: contacts_null_idx +type: minmax +expr: assumeNotNull(contacts_null) +granularity: 1 +data_compressed_bytes: 58 +data_uncompressed_bytes: 6 +marks: 48 +``` diff --git a/docs/zh/operations/system-tables/distributed_ddl_queue.md b/docs/zh/operations/system-tables/distributed_ddl_queue.md deleted file mode 120000 index a9cadc74af2..00000000000 --- a/docs/zh/operations/system-tables/distributed_ddl_queue.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/distributed_ddl_queue.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/distributed_ddl_queue.md b/docs/zh/operations/system-tables/distributed_ddl_queue.md new file mode 100644 index 00000000000..5237673d5d0 --- /dev/null +++ b/docs/zh/operations/system-tables/distributed_ddl_queue.md @@ -0,0 +1,64 @@ +# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue} + +包含有关在集群上执行的[分布式ddl查询(集群环境)](../../sql-reference/distributed-ddl.md)的信息. + +列信息: + +- `entry` ([String](../../sql-reference/data-types/string.md)) — 查询ID. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — 主机名称. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — 主机名解析到的IP地址. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — 主机端口. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — 查询状态. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — 群集名称. +- `query` ([String](../../sql-reference/data-types/string.md)) — 执行查询. +- `initiator` ([String](../../sql-reference/data-types/string.md)) — 执行查询的节点. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 查询开始时间. +- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 查询结束时间. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 查询执行时间(毫秒). +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — 来自于[ZooKeeper](../../operations/tips.md#zookeeper)的异常代码. + +**示例** + +``` sql +SELECT * +FROM system.distributed_ddl_queue +WHERE cluster = 'test_cluster' +LIMIT 2 +FORMAT Vertical + +Query id: f544e72a-6641-43f1-836b-24baa1c9632a + +Row 1: +────── +entry: query-0000000000 +host_name: clickhouse01 +host_address: 172.23.0.11 +port: 9000 +status: Finished +cluster: test_cluster +query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster +initiator: clickhouse01:9000 +query_start_time: 2020-12-30 13:07:51 +query_finish_time: 2020-12-30 13:07:51 +query_duration_ms: 6 +exception_code: ZOK + +Row 2: +────── +entry: query-0000000000 +host_name: clickhouse02 +host_address: 172.23.0.12 +port: 9000 +status: Finished +cluster: test_cluster +query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster +initiator: clickhouse01:9000 +query_start_time: 2020-12-30 13:07:51 +query_finish_time: 2020-12-30 13:07:51 +query_duration_ms: 6 +exception_code: ZOK + +2 rows in set. Elapsed: 0.025 sec. +``` + +[原始文章](https://clickhouse.com/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) diff --git a/docs/zh/operations/system-tables/distribution_queue.md b/docs/zh/operations/system-tables/distribution_queue.md deleted file mode 120000 index 3831b85000c..00000000000 --- a/docs/zh/operations/system-tables/distribution_queue.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/distribution_queue.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/distribution_queue.md b/docs/zh/operations/system-tables/distribution_queue.md new file mode 100644 index 00000000000..2317e0677a7 --- /dev/null +++ b/docs/zh/operations/system-tables/distribution_queue.md @@ -0,0 +1,50 @@ +# system.distribution_queue {#system_tables-distribution_queue} + +包含关于队列中要发送到分片的本地文件的信息. 这些本地文件包含通过以异步模式将新数据插入到Distributed表中而创建的新部分. + +列信息: + +- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称. + +- `table` ([String](../../sql-reference/data-types/string.md)) — 表名称. + +- `data_path` ([String](../../sql-reference/data-types/string.md)) — 存放本地文件的文件夹的路径. + +- `is_blocked` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag表示是否阻止向服务器发送本地文件. + +- `error_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 错误总数. + +- `data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 文件夹中的本地文件数. + +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 本地文件中压缩数据的大小, 以字节为单位. + +- `broken_data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 被标记为损坏的文件数量(由于错误). + +- `broken_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 破碎文件中压缩数据的大小, 以字节为单位. + +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — 关于最近发生的错误的文本信息(如果有的话). + +**示例** + +``` sql +SELECT * FROM system.distribution_queue LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: default +table: dist +data_path: ./store/268/268bc070-3aad-4b1a-9cf2-4987580161af/default@127%2E0%2E0%2E2:9000/ +is_blocked: 1 +error_count: 0 +data_files: 1 +data_compressed_bytes: 499 +last_exception: +``` + +**另请参阅** + +- [分布式表引擎](../../engines/table-engines/special/distributed.md) + +[原始文章](https://clickhouse.com/docs/en/operations/system_tables/distribution_queue) diff --git a/docs/zh/operations/system-tables/enabled-roles.md b/docs/zh/operations/system-tables/enabled-roles.md deleted file mode 120000 index 04ffee25343..00000000000 --- a/docs/zh/operations/system-tables/enabled-roles.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/enabled-roles.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/enabled-roles.md b/docs/zh/operations/system-tables/enabled-roles.md new file mode 100644 index 00000000000..f02c8ec1e65 --- /dev/null +++ b/docs/zh/operations/system-tables/enabled-roles.md @@ -0,0 +1,12 @@ +# system.enabled_roles {#system_tables-enabled_roles} + +包含当前所有活动角色, 包括当前用户的当前角色和当前角色的已授予角色. + +列信息: + +- `role_name` ([String](../../sql-reference/data-types/string.md))) — 角色名称. +- `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示 `enabled_role` 是否为具有 `ADMIN OPTION` 权限的角色的标志. +- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示 `enabled_role` 是否是当前用户的当前角色的标志. +- `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示 `enabled_role` 是否为默认角色的标志. + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/enabled-roles) diff --git a/docs/zh/operations/system-tables/errors.md b/docs/zh/operations/system-tables/errors.md deleted file mode 120000 index 2bb8c1dff0d..00000000000 --- a/docs/zh/operations/system-tables/errors.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/errors.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/errors.md b/docs/zh/operations/system-tables/errors.md new file mode 100644 index 00000000000..4d7baafb865 --- /dev/null +++ b/docs/zh/operations/system-tables/errors.md @@ -0,0 +1,35 @@ +# system.errors {#system_tables-errors} + +包含错误代码和它们被触发的次数. + +列信息: + +- `name` ([String](../../sql-reference/data-types/string.md)) — 错误名称 (`errorCodeToName`). +- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — 错误码. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 发生此错误的次数. +- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 上一次错误发生的时间. +- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — 最后一个错误的消息. +- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — 一个[堆栈跟踪](https://en.wikipedia.org/wiki/Stack_trace), 它表示存储被调用方法的物理地址列表. +- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 远程异常(即在一个分布式查询期间接收的). + +**示例** + +``` sql +SELECT name, code, value +FROM system.errors +WHERE value > 0 +ORDER BY code ASC +LIMIT 1 + +┌─name─────────────┬─code─┬─value─┐ +│ CANNOT_OPEN_FILE │ 76 │ 1 │ +└──────────────────┴──────┴───────┘ +``` + +``` sql +WITH arrayMap(x -> demangle(addressToSymbol(x)), last_error_trace) AS all +SELECT name, arrayStringConcat(all, '\n') AS res +FROM system.errors +LIMIT 1 +SETTINGS allow_introspection_functions=1\G +``` diff --git a/docs/zh/operations/system-tables/grants.md b/docs/zh/operations/system-tables/grants.md deleted file mode 120000 index ec7f50b118e..00000000000 --- a/docs/zh/operations/system-tables/grants.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/grants.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/grants.md b/docs/zh/operations/system-tables/grants.md new file mode 100644 index 00000000000..8795f5ebf19 --- /dev/null +++ b/docs/zh/operations/system-tables/grants.md @@ -0,0 +1,24 @@ +# system.grants {#system_tables-grants} + +授予ClickHouse用户帐户的权限. + +列信息: +- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 用户名称. + +- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 分配给用户帐号的角色. + +- `access_type` ([Enum8](../../sql-reference/data-types/enum.md)) — ClickHouse用户帐号的接入参数. + +- `database` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 数据库名称. + +- `table` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 表名称. + +- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 被授予访问权限的列的名称. + +- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 逻辑值. 它显示了某些特权是否被取消. 可能的值: +- `0` — 该行描述了部分撤销. +- `1` — 这一行描述了一个授权. + +- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 被授予`WITH GRANT OPTION` 权限, 参见 [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/grants) diff --git a/docs/zh/operations/system-tables/licenses.md b/docs/zh/operations/system-tables/licenses.md deleted file mode 120000 index a84b4bbf3e3..00000000000 --- a/docs/zh/operations/system-tables/licenses.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/licenses.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/licenses.md b/docs/zh/operations/system-tables/licenses.md new file mode 100644 index 00000000000..7e59ba865f5 --- /dev/null +++ b/docs/zh/operations/system-tables/licenses.md @@ -0,0 +1,39 @@ +# system.licenses {#system-tables_system.licenses} + +包含位于 ClickHouse 源的 [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) 目录中的第三方库的许可证. + +列信息: + +- `library_name` ([String](../../sql-reference/data-types/string.md)) — 库的名称, 它是与之连接的许可证. +- `license_type` ([String](../../sql-reference/data-types/string.md)) — 许可类型-例如Apache, MIT. +- `license_path` ([String](../../sql-reference/data-types/string.md)) — 带有许可文本的文件的路径. +- `license_text` ([String](../../sql-reference/data-types/string.md)) — 许可协议文本. + +**示例** + +``` sql +SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 +``` + +``` text +┌─library_name───────┬─license_type─┬─license_path────────────────────────┐ +│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │ +│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │ +│ avro │ Apache │ /contrib/avro/LICENSE.txt │ +│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │ +│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │ +│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │ +│ aws │ Apache │ /contrib/aws/LICENSE.txt │ +│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │ +│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │ +│ brotli │ MIT │ /contrib/brotli/LICENSE │ +│ capnproto │ MIT │ /contrib/capnproto/LICENSE │ +│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │ +│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │ +│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │ +│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │ +└────────────────────┴──────────────┴─────────────────────────────────────┘ + +``` + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/licenses) diff --git a/docs/zh/operations/system-tables/opentelemetry_span_log.md b/docs/zh/operations/system-tables/opentelemetry_span_log.md deleted file mode 120000 index 59aab8cd11d..00000000000 --- a/docs/zh/operations/system-tables/opentelemetry_span_log.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/opentelemetry_span_log.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/opentelemetry_span_log.md b/docs/zh/operations/system-tables/opentelemetry_span_log.md new file mode 100644 index 00000000000..6eceb3da889 --- /dev/null +++ b/docs/zh/operations/system-tables/opentelemetry_span_log.md @@ -0,0 +1,53 @@ +# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log} + +包含已执行查询的[跟踪范围](https://opentracing.io/docs/overview/spans/)的信息. + +列信息: + +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — 执行的查询的跟踪ID. + +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — `跟踪 跨度` ID. + +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 父级`跟踪 跨度` ID. + +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — 操作的名称. + +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — `跟踪 跨度` 开始时间 (微秒). + +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — `跟踪 跨度 结束时间 (微秒). + +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — `跟踪 跨度` 完成日期. + +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [属性](https://opentelemetry.io/docs/go/instrumentation/#attributes) 名称取决于 `跟踪 跨度`. 它们是根据[OpenTelemetry](https://opentelemetry.io/)标准中的建议填写的. + +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 属性值取决于 `跟踪 跨度`. 它们是根据 `OpenTelemetry` 标准中的建议填写的. + +**示例** + +查询: + +``` sql +SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; +``` + +结果: + +``` text +Row 1: +────── +trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 +span_id: 701487461015578150 +parent_span_id: 2991972114672045096 +operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +start_time_us: 1612374594529090 +finish_time_us: 1612374594529108 +finish_date: 2021-02-03 +attribute.names: [] +attribute.values: [] +``` + +**另请参阅** + +- [OpenTelemetry](../../operations/opentelemetry.md) + +[原始文章](https://clickhouse.com/docs/en/operations/system_tables/opentelemetry_span_log) diff --git a/docs/zh/operations/system-tables/parts_columns.md b/docs/zh/operations/system-tables/parts_columns.md deleted file mode 120000 index f2c7d4dde34..00000000000 --- a/docs/zh/operations/system-tables/parts_columns.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/parts_columns.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/parts_columns.md b/docs/zh/operations/system-tables/parts_columns.md new file mode 100644 index 00000000000..d603f251d30 --- /dev/null +++ b/docs/zh/operations/system-tables/parts_columns.md @@ -0,0 +1,148 @@ +# system.parts_columns {#system_tables-parts_columns} + +包含关于[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)表的部分和列的信息. + +每一行描述一个数据部分. + +列信息: + +- `partition` ([String](../../sql-reference/data-types/string.md)) — 分区的名称. 要了解什么是分区, 请参阅[ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter)查询的描述. + + 格式: + + - `YYYYMM` 按月自动分区. + - `any_string` 当手动分区. + +- `name` ([String](../../sql-reference/data-types/string.md)) — 数据部分的名称. + +- `part_type` ([String](../../sql-reference/data-types/string.md)) — 数据部分存储格式. + + 可能的值: + + - `Wide` — 每一列存储在文件系统中的一个单独的文件中. + - `Compact` — 所有列都存储在文件系统中的一个文件中. + + 数据存储格式由[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)表的 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part` 设置控制. + +- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 数据部分是否处于活动状态的标志. 如果数据部分是活动的, 则在表中使用它. 否则, 它被删除. 合并后仍保留非活动数据部分. + +- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 标记数. 要获得数据部分中的大约行数, 请将“标记”乘以索引粒度(通常为8192)(此提示不适用于自适应粒度). + +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 行数. + +- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 所有数据部分文件的总大小(以字节为单位). + +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 在数据部分中压缩数据的总大小. 不包括所有辅助文件(例如,带有标记的文件). + +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 数据部分中未压缩数据的总大小. 不包括所有辅助文件(例如,带有标记的文件). + +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 带标记的文件的大小. + +- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 包含数据部分的目录被修改的时间. 这通常对应于数据部分创建的时间. + +- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 数据部分变为非活动状态的时间. + +- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 使用数据部分的位置数. 大于2的值表示该数据部分用于查询或合并. + +- `min_date` ([Date](../../sql-reference/data-types/date.md)) — 数据部分中日期键的最小值. + +- `max_date` ([Date](../../sql-reference/data-types/date.md)) — 数据部分中日期键的最大值. + +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — 分区ID. + +- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 合并后组成当前部分的数据部分最小值. + +- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 合并后组成当前部分的数据部分最大值. + +- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 合并树的深度. 0表示当前部分是通过插入而不是合并其他部分创建的. + +- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 用于确定应该对数据部分应用哪些突变的编号(版本高于 `data_version` 的突变). + +- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 主键值使用的内存量(以字节为单位). + +- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 为主键值保留的内存量(以字节为单位). + +- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称. + +- `table` ([String](../../sql-reference/data-types/string.md)) — 表名称. + +- `engine` ([String](../../sql-reference/data-types/string.md)) — 不带参数的表引擎的名称. + +- `disk_name` ([String](../../sql-reference/data-types/string.md)) — 存储数据部分的磁盘名称. + +- `path` ([String](../../sql-reference/data-types/string.md)) — 数据部件文件文件夹的绝对路径. + +- `column` ([String](../../sql-reference/data-types/string.md)) — 列名称. + +- `type` ([String](../../sql-reference/data-types/string.md)) — 列类型. + +- `column_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 表中以1开头的一列的序号位置. + +- `default_kind` ([String](../../sql-reference/data-types/string.md)) — 默认值的表达式类型 (`DEFAULT`, `MATERIALIZED`, `ALIAS`), 如果未定义则为空字符串. + +- `default_expression` ([String](../../sql-reference/data-types/string.md)) — 表达式的默认值, 如果未定义则为空字符串. + +- `column_bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 列的总大小(以字节为单位). + +- `column_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 列中压缩数据的总大小,以字节为单位. + +- `column_data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 列中解压缩数据的总大小,以字节为单位. + +- `column_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 带标记的列的大小,以字节为单位. + +- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — `bytes_on_disk` 别名. + +- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — `marks_bytes` 别名. + +**示例** + +``` sql +SELECT * FROM system.parts_columns LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +partition: tuple() +name: all_1_2_1 +part_type: Wide +active: 1 +marks: 2 +rows: 2 +bytes_on_disk: 155 +data_compressed_bytes: 56 +data_uncompressed_bytes: 4 +marks_bytes: 96 +modification_time: 2020-09-23 10:13:36 +remove_time: 2106-02-07 06:28:15 +refcount: 1 +min_date: 1970-01-01 +max_date: 1970-01-01 +partition_id: all +min_block_number: 1 +max_block_number: 2 +level: 1 +data_version: 1 +primary_key_bytes_in_memory: 2 +primary_key_bytes_in_memory_allocated: 64 +database: default +table: 53r93yleapyears +engine: MergeTree +disk_name: default +path: /var/lib/clickhouse/data/default/53r93yleapyears/all_1_2_1/ +column: id +type: Int8 +column_position: 1 +default_kind: +default_expression: +column_bytes_on_disk: 76 +column_data_compressed_bytes: 28 +column_data_uncompressed_bytes: 2 +column_marks_bytes: 48 +``` + +**另请参阅** + +- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) + +[原始文章](https://clickhouse.com/docs/en/operations/system_tables/parts_columns) diff --git a/docs/zh/operations/system-tables/query_views_log.md b/docs/zh/operations/system-tables/query_views_log.md deleted file mode 120000 index f606e4108ca..00000000000 --- a/docs/zh/operations/system-tables/query_views_log.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/query_views_log.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/query_views_log.md b/docs/zh/operations/system-tables/query_views_log.md new file mode 100644 index 00000000000..57946e5104a --- /dev/null +++ b/docs/zh/operations/system-tables/query_views_log.md @@ -0,0 +1,86 @@ +# system.query_views_log {#system_tables-query_views_log} + +包含有关运行查询时执行的从属视图的信息,例如视图类型或执行时间. + +开始记录: + +1. 在 [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) 部分配置参数. +2. 设置 [log_query_views](../../operations/settings/settings.md#settings-log-query-views) 为 1. + +数据的刷新周期是在[query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log)服务器设置部分的 `flush_interval_milliseconds` 参数中设置的. 要强制刷新,请使用[SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs)查询. + +ClickHouse不会自动从表中删除数据. 详见 [Introduction](../../operations/system-tables/index.md#system-tables-introduction). + +您可以使用[log_queries_probability](../../operations/settings/settings.md#log-queries-probability)设置来减少在 `query_views_log` 表中注册的查询数量. + +列信息: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — 视图的最后一个事件发生的日期. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 视图完成执行的日期和时间. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — 视图以微秒精度完成执行的日期和时间. +- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 视图执行的持续时间(各阶段之和), 以毫秒为单位. +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — 初始查询的ID (用于分布式查询执行). +- `view_name` ([String](../../sql-reference/data-types/string.md)) — 视图名称. +- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — 视图的UUID. +- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — 视图类型. 值: + - `'Default' = 1` — [Default views](../../sql-reference/statements/create/view.md#normal). 不应该出现在日志中. + - `'Materialized' = 2` — [Materialized views](../../sql-reference/statements/create/view.md#materialized). + - `'Live' = 3` — [Live views](../../sql-reference/statements/create/view.md#live-view). +- `view_query` ([String](../../sql-reference/data-types/string.md)) — 视图执行的查询. +- `view_target` ([String](../../sql-reference/data-types/string.md)) — 视图目标表的名称. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读行数. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读字节数. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 写入行数. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 写入字节数. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在此视图上下文中, 已分配内存和已释放内存之间的最大差值. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents度量不同的指标. 它们的描述可以在表 [system.events](../../operations/system-tables/events.md#system_tables-events) 中找到. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — 视图状态. 值: + - `'QueryStart' = 1` — 成功启动视图执行. 不应该出现. + - `'QueryFinish' = 2` — 视图执行成功结束. + - `'ExceptionBeforeStart' = 3` — 视图执行开始前的异常. + - `'ExceptionWhileProcessing' = 4` — 视图执行期间的异常. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — 异常代码. +- `exception` ([String](../../sql-reference/data-types/string.md)) — 异常报文. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [堆栈跟踪](https://en.wikipedia.org/wiki/Stack_trace). 如果查询成功完成, 则为空字符串. + +**示例** + +查询: + +``` sql +SELECT * FROM system.query_views_log LIMIT 1 \G; +``` + +结果: + +``` text +Row 1: +────── +event_date: 2021-06-22 +event_time: 2021-06-22 13:23:07 +event_time_microseconds: 2021-06-22 13:23:07.738221 +view_duration_ms: 0 +initial_query_id: c3a1ac02-9cad-479b-af54-9e9c0a7afd70 +view_name: default.matview_inner +view_uuid: 00000000-0000-0000-0000-000000000000 +view_type: Materialized +view_query: SELECT * FROM default.table_b +view_target: default.`.inner.matview_inner` +read_rows: 4 +read_bytes: 64 +written_rows: 2 +written_bytes: 32 +peak_memory_usage: 4196188 +ProfileEvents: {'FileOpen':2,'WriteBufferFromFileDescriptorWrite':2,'WriteBufferFromFileDescriptorWriteBytes':187,'IOBufferAllocs':3,'IOBufferAllocBytes':3145773,'FunctionExecute':3,'DiskWriteElapsedMicroseconds':13,'InsertedRows':2,'InsertedBytes':16,'SelectedRows':4,'SelectedBytes':48,'ContextLock':16,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':698,'SoftPageFaults':4,'OSReadChars':463} +status: QueryFinish +exception_code: 0 +exception: +stack_trace: +``` + +**另请参阅** + +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — 包含查询执行的常用信息的 `query_log`系统表的描述. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — 包含关于每个查询执行线程的信息. + +[原始文章](https://clickhouse.com/docs/en/operations/system_tables/query_thread_log) diff --git a/docs/zh/operations/system-tables/quota_limits.md b/docs/zh/operations/system-tables/quota_limits.md deleted file mode 120000 index 3fecf881c92..00000000000 --- a/docs/zh/operations/system-tables/quota_limits.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/quota_limits.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/quota_limits.md b/docs/zh/operations/system-tables/quota_limits.md new file mode 100644 index 00000000000..67a2a01eb71 --- /dev/null +++ b/docs/zh/operations/system-tables/quota_limits.md @@ -0,0 +1,21 @@ +# system.quota_limits {#system_tables-quota_limits} + +包含关于所有配额的所有间隔的最大值的信息. 任何行数或0行都可以对应一个配额. + +列信息: +- `quota_name` ([String](../../sql-reference/data-types/string.md)) — 配额名称. +- `duration` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 计算资源消耗的时间间隔长度,单位为秒. +- `is_randomized_interval` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 逻辑值. 它显示了间隔是否是随机的. 如果间隔不是随机的, 它总是在同一时间开始. 例如, 1 分钟的间隔总是从整数分钟开始(即它可以从 11:20:00 开始, 但它永远不会从 11:20:01 开始), 一天的间隔总是从 UTC 午夜开始. 如果间隔是随机的, 则第一个间隔在随机时间开始, 随后的间隔一个接一个开始. 值: +- `0` — 区间不是随机的. +- `1` — 区间是随机的. +- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大查询数. +- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — select 最大查询数. +- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — insert 最大查询数. +- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大错误数. +- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大结果行数. +- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 用于存储查询结果的最大RAM容量(以字节为单位). +- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 从参与查询的所有表和表函数中读取的最大行数. +- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 从参与查询的所有表和表函数中读取的最大字节数. +- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — 查询执行时间的最大值, 单位为秒. + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/quota_limits) diff --git a/docs/zh/operations/system-tables/quota_usage.md b/docs/zh/operations/system-tables/quota_usage.md deleted file mode 120000 index c79f1a75033..00000000000 --- a/docs/zh/operations/system-tables/quota_usage.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/quota_usage.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/quota_usage.md b/docs/zh/operations/system-tables/quota_usage.md new file mode 100644 index 00000000000..020d14aee5e --- /dev/null +++ b/docs/zh/operations/system-tables/quota_usage.md @@ -0,0 +1,32 @@ +# system.quota_usage {#system_tables-quota_usage} + +当前用户的配额使用情况: 使用了多少, 还剩多少. + +列信息: +- `quota_name` ([String](../../sql-reference/data-types/string.md)) — 配额名称. +- `quota_key`([String](../../sql-reference/data-types/string.md)) — 配额数值. 比如, if keys = \[`ip address`\], `quota_key` 可能有一个值 ‘192.168.1.1’. +- `start_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 计算资源消耗的开始时间. +- `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 计算资源消耗的结束时间. +- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 计算资源消耗的时间间隔长度, 单位为秒. +- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 在此间隔内的请求总数. +- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 在此间隔内查询请求的总数. +- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 在此间隔内插入请求的总数. +- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大请求数. +- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 抛出异常的查询数. +- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大错误数. +- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 结果给出的总行数. +- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大结果行数. +- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 用于存储查询结果的RAM容量(以字节为单位). +- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 用于存储查询结果的最大RAM容量,以字节为单位. +- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of source rows read from tables for running the query on all remote servers. +- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 参与查询的所有表和表函数中读取的最大行数. +- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 参与查询的所有表和表函数中读取的总字节数. +- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 所有表和表函数中读取的最大字节数. +- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — 总查询执行时间, 以秒为单位(挂墙时间). +- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — 查询最大执行时间. + +## 另请参阅 {#see-also} + +- [查看配额信息](../../sql-reference/statements/show.md#show-quota-statement) + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/quota_usage) diff --git a/docs/zh/operations/system-tables/quotas.md b/docs/zh/operations/system-tables/quotas.md deleted file mode 120000 index b6a26bf77f3..00000000000 --- a/docs/zh/operations/system-tables/quotas.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/quotas.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/quotas.md b/docs/zh/operations/system-tables/quotas.md new file mode 100644 index 00000000000..b25c213bdd1 --- /dev/null +++ b/docs/zh/operations/system-tables/quotas.md @@ -0,0 +1,28 @@ +# system.quotas {#system_tables-quotas} + +包含 [quotas](../../operations/system-tables/quotas.md) 信息. + +列信息: +- `name` ([String](../../sql-reference/data-types/string.md)) — 配额名称. +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — 配额 ID. +- `storage`([String](../../sql-reference/data-types/string.md)) — 存储配额. 可能的值:“users.xml”(如果在 users.xml 文件中配置了配额), “disk”(如果由 SQL 查询配置的配额). +- `keys` ([Array](../../sql-reference/data-types/array.md)([Enum8](../../sql-reference/data-types/enum.md))) — Key指定配额应该如何共享. 如果两个连接使用相同的配额和键,则它们共享相同数量的资源. 值: + - `[]` — 所有用户共享相同的配额. + - `['user_name']` — 相同用户名的连接共享相同的配额. + - `['ip_address']` — 来自同一IP的连接共享相同的配额. + - `['client_key']` — 具有相同密钥的连接共享相同配额. 密钥必须由客户端显式提供. 使用[clickhouse-client](../../interfaces/cli.md)时, 在 `--quota_key` 参数中传递一个key值, 或者在客户端配置文件中使用 `quota_key` 参数. 使用 HTTP 接口时, 使用 `X-ClickHouse-Quota` 报头. + - `['user_name', 'client_key']` — 具有相同 `client_key` 的连接共享相同的配额. 如果客户端没有提供密钥, 配额将跟踪 `user_name`. + - `['client_key', 'ip_address']` — 具有相同 `client_key` 的连接共享相同的配额. 如果客户端没有提供密钥, 配额将跟踪 `ip_address`. +- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 时间间隔以秒为单位. +- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 逻辑值. 它显示了配额应用于哪些用户. 值: + - `0` — 配额应用于 `apply_to_list` 中指定的用户. + - `1` — 配额适用于除 `apply_to_except` 中列出的用户之外的所有用户. +- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 需要应用配额的用户名/[角色](../../operations/access-rights.md#role-management) 列表. +- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 不需要应用配额的用户名/角色列表. + +## 另请参阅 {#see-also} + +- [查看配额信息](../../sql-reference/statements/show.md#show-quotas-statement) + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/quotas) + diff --git a/docs/zh/operations/system-tables/quotas_usage.md b/docs/zh/operations/system-tables/quotas_usage.md deleted file mode 120000 index ba204a4d9c0..00000000000 --- a/docs/zh/operations/system-tables/quotas_usage.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/quotas_usage.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/quotas_usage.md b/docs/zh/operations/system-tables/quotas_usage.md new file mode 100644 index 00000000000..f554539ecfe --- /dev/null +++ b/docs/zh/operations/system-tables/quotas_usage.md @@ -0,0 +1,35 @@ +# system.quotas_usage {#system_tables-quotas_usage} + +所有用户配额使用情况. + +列信息: +- `quota_name` ([String](../../sql-reference/data-types/string.md)) — 配额名称. +- `quota_key` ([String](../../sql-reference/data-types/string.md)) — 配额key值. +- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 当前用户配额使用情况. +- `start_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — 计算资源消耗的开始时间. +- `end_time` ([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md)))) — 计算资源消耗的结束时间. +- `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — 计算资源消耗的时间间隔长度,单位为秒. +- `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 在此间隔内的请求总数. +- `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大请求数. +- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 此间隔内查询请求的总数. +- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 查询请求的最大数量. +- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 此间隔内插入请求的总数. +- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大插入请求数. +- `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 抛出异常的查询数. +- `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 最大误差数. +- `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 结果给出的总行数. +- `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 从表中读取的最大源行数. +- `result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 用于存储查询结果的RAM容量(以字节为单位). +- `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 用于存储查询结果的最大RAM容量, 以字节为单位. +- `read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md)))) — 为在所有远程服务器上运行查询而从表中读取的源行总数. +- `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 参与查询的所有表和表函数中读取的最大行数. +- `read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 参与查询的所有表和表函数中读取的总字节数. +- `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 所有表和表函数中读取的最大字节数. +- `execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — 总查询执行时间, 以秒为单位(挂墙时间). +- `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — 查询最大执行时间. + +## 另请参阅 {#see-also} + +- [查看配额信息](../../sql-reference/statements/show.md#show-quota-statement) + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/quotas_usage) diff --git a/docs/zh/operations/system-tables/replicated_fetches.md b/docs/zh/operations/system-tables/replicated_fetches.md deleted file mode 120000 index bf4547ebd07..00000000000 --- a/docs/zh/operations/system-tables/replicated_fetches.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/replicated_fetches.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/replicated_fetches.md b/docs/zh/operations/system-tables/replicated_fetches.md new file mode 100644 index 00000000000..fededbaf28d --- /dev/null +++ b/docs/zh/operations/system-tables/replicated_fetches.md @@ -0,0 +1,70 @@ +# system.replicated_fetches {#system_tables-replicated_fetches} + +包含当前正在运行的后台提取的信息. + +列信息: + +- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称. + +- `table` ([String](../../sql-reference/data-types/string.md)) — 表名称. + +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — 显示当前正在运行的后台提取开始以来经过的时间(以秒为单位). + +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — 完成工作的百分比从0到1. + +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — 显示当前正在运行的后台提取的结果而形成的部分的名称. + +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — 显示当前正在运行的后台提取的结果而形成的部分的绝对路径. + +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — 分区 ID. + +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 结果部分中压缩数据的总大小(以字节为单位). + +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 从结果部分读取的压缩字节数. + +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — 源副本的绝对路径. + +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — 源副本的主机名称. + +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — 源副本的端口号. + +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme. + +- `URI` ([String](../../sql-reference/data-types/string.md)) — 统一资源标识符. + +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 该标志指示是否正在使用 `TO DETACHED` 表达式执行当前正在运行的后台提取. + +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 线程标识符. + +**示例** + +``` sql +SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: default +table: t +elapsed: 7.243039876 +progress: 0.41832135995612835 +result_part_name: all_0_0_0 +result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/ +partition_id: all +total_size_bytes_compressed: 1052783726 +bytes_read_compressed: 440401920 +source_replica_path: /clickhouse/test/t/replicas/1 +source_replica_hostname: node1 +source_replica_port: 9009 +interserver_scheme: http +URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false +to_detached: 0 +thread_id: 54 +``` + +**另请参阅** + +- [管理 ReplicatedMergeTree 表](../../sql-reference/statements/system/#query-language-system-replicated) + +[原始文章](https://clickhouse.com/docs/en/operations/system_tables/replicated_fetches) diff --git a/docs/zh/operations/system-tables/replication_queue.md b/docs/zh/operations/system-tables/replication_queue.md deleted file mode 120000 index cdc452594e6..00000000000 --- a/docs/zh/operations/system-tables/replication_queue.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/replication_queue.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/replication_queue.md b/docs/zh/operations/system-tables/replication_queue.md new file mode 100644 index 00000000000..0c9e19f76d0 --- /dev/null +++ b/docs/zh/operations/system-tables/replication_queue.md @@ -0,0 +1,91 @@ +# system.replication_queue {#system_tables-replication_queue} + +包含用于 `ReplicatedMergeTree` 系列表的复制队列中存储在ZooKeeper中的任务的信息. + +列信息: + +- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称. + +- `table` ([String](../../sql-reference/data-types/string.md)) — 表名称. + +- `replica_name` ([String](../../sql-reference/data-types/string.md)) — ZooKeeper中的副本名称. 同一张表的不同副本具有不同的名称. + +- `position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 任务在队列中的位置. + +- `node_name` ([String](../../sql-reference/data-types/string.md)) — ZooKeeper中的节点名称. + +- `type` ([String](../../sql-reference/data-types/string.md)) — 队列中任务的类型, 其中之一: + + - `GET_PART` — 从另一个副本获取分片. + - `ATTACH_PART` — 附加的部分, 可能来自我们自己的副本(如果在 `detached` 文件夹中找到). 您可以将其视为具有一些优化的 `GET_PART` , 因为它们几乎相同. + - `MERGE_PARTS` — 合并分片. + - `DROP_RANGE` — 删除指定分区内指定编号范围内的分片. + - `CLEAR_COLUMN` — 注意:已弃用. 从指定分区删除特定列. + - `CLEAR_INDEX` — 注意:已弃用. 从指定分区删除特定索引. + - `REPLACE_RANGE` — 丢弃一定范围的零件并用新零件替换它们. + - `MUTATE_PART` — 对分片应用一个或多个突变. + - `ALTER_METADATA` — 根据全局 /metadata 和 /columns 路径应用alter修改. + +- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 提交任务执行的日期和时间. + +- `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 等待任务完成并确认完成的副本数. 此列仅与 `GET_PARTS` 任务相关. + +- `source_replica` ([String](../../sql-reference/data-types/string.md)) — 源副本的名称. + +- `new_part_name` ([String](../../sql-reference/data-types/string.md)) — 新分片的名称. + +- `parts_to_merge` ([Array](../../sql-reference/data-types/array.md) ([String](../../sql-reference/data-types/string.md))) — 要合并或更新的分片名称. + +- `is_detach` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 该标志指示 `DETACH_PARTS` 任务是否在队列中. + +- `is_currently_executing` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 该标志指示当前是否正在执行特定任务. + +- `num_tries` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 尝试完成任务失败的次数. + +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — 发生的最后一个错误的短信(如果有). + +- `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 上次尝试任务的日期和时间. + +- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 延期任务数. + +- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — 任务延期的原因. + +- `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 上次推迟任务的日期和时间. + +- `merge_type` ([String](../../sql-reference/data-types/string.md)) — 当前合并的类型. 如果是突变则为空. + +**示例** + +``` sql +SELECT * FROM system.replication_queue LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: merge +table: visits_v2 +replica_name: mtgiga001-1t.metrika.yandex.net +position: 15 +node_name: queue-0009325559 +type: MERGE_PARTS +create_time: 2020-12-07 14:04:21 +required_quorum: 0 +source_replica: mtgiga001-1t.metrika.yandex.net +new_part_name: 20201130_121373_121384_2 +parts_to_merge: ['20201130_121373_121378_1','20201130_121379_121379_0','20201130_121380_121380_0','20201130_121381_121381_0','20201130_121382_121382_0','20201130_121383_121383_0','20201130_121384_121384_0'] +is_detach: 0 +is_currently_executing: 0 +num_tries: 36 +last_exception: Code: 226, e.displayText() = DB::Exception: Marks file '/opt/clickhouse/data/merge/visits_v2/tmp_fetch_20201130_121373_121384_2/CounterID.mrk' does not exist (version 20.8.7.15 (official build)) +last_attempt_time: 2020-12-08 17:35:54 +num_postponed: 0 +postpone_reason: +last_postpone_time: 1970-01-01 03:00:00 +``` + +**另请参阅** + +- [管理 ReplicatedMergeTree 表](../../sql-reference/statements/system.md#query-language-system-replicated) + +[原始文章](https://clickhouse.com/docs/en/operations/system_tables/replication_queue) diff --git a/docs/zh/operations/system-tables/role-grants.md b/docs/zh/operations/system-tables/role-grants.md deleted file mode 120000 index 6a25ffa31ce..00000000000 --- a/docs/zh/operations/system-tables/role-grants.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/role-grants.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/role-grants.md b/docs/zh/operations/system-tables/role-grants.md new file mode 100644 index 00000000000..8957c73df73 --- /dev/null +++ b/docs/zh/operations/system-tables/role-grants.md @@ -0,0 +1,21 @@ +#system.role_grants {#system_tables-role_grants} + +包含用户和角色的角色授予. 向该表添加项, 请使用`GRANT role TO user`. + +列信息: + +- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 用户名称. + +- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 角色名称. + +- `granted_role_name` ([String](../../sql-reference/data-types/string.md)) — 授予 `role_name` 角色的角色名称. 要将一个角色授予另一个角色, 请使用`GRANT role1 TO role2`. + +- `granted_role_is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示 `granted_role` 是否为默认角色的标志. 参考值: + - 1 — `granted_role` is a default role. + - 0 — `granted_role` is not a default role. + +- `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示 `granted_role` 是否是具有 [ADMIN OPTION](../../sql-reference/statements/grant.md#admin-option-privilege) 特权的角色的标志. 参考值: + - 1 — 该角色具有 `ADMIN OPTION` 权限. + - 0 — 该角色不具有 `ADMIN OPTION` 权限. + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/role-grants) diff --git a/docs/zh/operations/system-tables/roles.md b/docs/zh/operations/system-tables/roles.md deleted file mode 120000 index 391bc980a48..00000000000 --- a/docs/zh/operations/system-tables/roles.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/roles.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/roles.md b/docs/zh/operations/system-tables/roles.md new file mode 100644 index 00000000000..c3537b978e4 --- /dev/null +++ b/docs/zh/operations/system-tables/roles.md @@ -0,0 +1,15 @@ +# system.roles {#system_tables-roles} + +包含有关已配置的 [角色](../../operations/access-rights.md#role-management) 信息. + +列信息: + +- `name` ([String](../../sql-reference/data-types/string.md)) — 角色名称. +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — 角色 ID. +- `storage` ([String](../../sql-reference/data-types/string.md)) — 角色存储的路径. 在 `access_control_path` 参数中配置. + +## 另请参阅 {#see-also} + +- [查看角色信息](../../sql-reference/statements/show.md#show-roles-statement) + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/roles) diff --git a/docs/zh/operations/system-tables/row_policies.md b/docs/zh/operations/system-tables/row_policies.md deleted file mode 120000 index b194161cf1d..00000000000 --- a/docs/zh/operations/system-tables/row_policies.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/row_policies.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/row_policies.md b/docs/zh/operations/system-tables/row_policies.md new file mode 100644 index 00000000000..4eaf291cc68 --- /dev/null +++ b/docs/zh/operations/system-tables/row_policies.md @@ -0,0 +1,34 @@ +# system.row_policies {#system_tables-row_policies} + +包含一个特定表的过滤器, 以及应该使用此行策略的角色和/或用户列表. + +列信息: +- `name` ([String](../../sql-reference/data-types/string.md)) — 行策略的名称. + +- `short_name` ([String](../../sql-reference/data-types/string.md)) — 行策略的短名称. 行策略的名称是复合的,例如:myfilter ON mydb.mytable. 这里 "myfilter ON mydb.mytable" 是行策略的名称, "myfilter" 是它的简称. + +- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称. + +- `table` ([String](../../sql-reference/data-types/string.md)) — 表名称. + +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — 行策略 ID. + +- `storage` ([String](../../sql-reference/data-types/string.md)) — 存储行策略的目录名. + +- `select_filter` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 用于过滤行的条件. + +- `is_restrictive` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示行策略是否限制对行的访问, 参考 [CREATE ROW POLICY](../../sql-reference/statements/create/row-policy.md#create-row-policy-as). 值: +- `0` — 行策略使用 `AS PERMISSIVE` 子句定义. +- `1` — 行策略使用 `AS RESTRICTIVE` 子句定义. + +- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示为所有角色和/或用户设置的行策略. + +- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 应用行策略的角色和/或用户列表. + +- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 行策略应用于除列出的角色和/或用户之外的所有角色和/或用户. + +## 另请参阅 {#see-also} + +- [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/row_policies) diff --git a/docs/zh/operations/system-tables/settings_profile_elements.md b/docs/zh/operations/system-tables/settings_profile_elements.md deleted file mode 120000 index 3b0e70ead49..00000000000 --- a/docs/zh/operations/system-tables/settings_profile_elements.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/settings_profile_elements.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/settings_profile_elements.md b/docs/zh/operations/system-tables/settings_profile_elements.md new file mode 100644 index 00000000000..2f5eb5b3044 --- /dev/null +++ b/docs/zh/operations/system-tables/settings_profile_elements.md @@ -0,0 +1,30 @@ +# system.settings_profile_elements {#system_tables-settings_profile_elements} + +描述settings配置文件的内容: + +- 约束. +- setting适用的角色和用户. +- 父级 setting 配置文件. + +列信息: +- `profile_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting 配置文件名称. + +- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 用户名称. + +- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 角色名称. + +- `index` ([UInt64](../../sql-reference/data-types/int-uint.md)) — settings 配置文件元素的顺序编号. + +- `setting_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting 名称. + +- `value` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting 值. + +- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — setting 最小值. 未设置则赋 `NULL`. + +- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — setting 最大值. 未设置则赋 `NULL`. + +- `readonly` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges))) — 只允许读查询的配置文件. + +- `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — 此setting配置文件的父配置文件. 未设置则赋 `NULL`. 设置则将从其父配置文件继承所有设置的值和约束(`min`、`max`、`readonly`). + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/settings_profile_elements) diff --git a/docs/zh/operations/system-tables/settings_profiles.md b/docs/zh/operations/system-tables/settings_profiles.md deleted file mode 120000 index 1c559bf2445..00000000000 --- a/docs/zh/operations/system-tables/settings_profiles.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/settings_profiles.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/settings_profiles.md b/docs/zh/operations/system-tables/settings_profiles.md new file mode 100644 index 00000000000..46102f9c2a6 --- /dev/null +++ b/docs/zh/operations/system-tables/settings_profiles.md @@ -0,0 +1,24 @@ +# system.settings_profiles {#system_tables-settings_profiles} + +包含 Setting 配置文件中指定的属性. + +列信息: +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting 配置文件 name. + +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Setting 配置文件 ID. + +- `storage` ([String](../../sql-reference/data-types/string.md)) — Setting 配置文件的存储路径. 在`access_control_path`参数中配置. + +- `num_elements` ([UInt64](../../sql-reference/data-types/int-uint.md)) — `system.settings_profile_elements` 表中此配置文件的元素数. + +- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 为所有角色和/或用户设置的 Setting 配置文件. + +- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 应用 Setting 配置文件的角色和/或用户列表. + +- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Setting 配置文件适用于除所列角色和/或用户之外的所有角色和/或用户. + +## 另请参阅 {#see-also} + +- [查看配置文件信息](../../sql-reference/statements/show.md#show-profiles-statement) + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/settings_profiles) diff --git a/docs/zh/operations/system-tables/stack_trace.md b/docs/zh/operations/system-tables/stack_trace.md deleted file mode 120000 index 8dea20028f1..00000000000 --- a/docs/zh/operations/system-tables/stack_trace.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/stack_trace.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/stack_trace.md b/docs/zh/operations/system-tables/stack_trace.md new file mode 100644 index 00000000000..8e46efd17ab --- /dev/null +++ b/docs/zh/operations/system-tables/stack_trace.md @@ -0,0 +1,91 @@ +# system.stack_trace {#system-tables_stack_trace} + +包含所有服务器线程的堆栈跟踪. 允许开发人员对服务器状态进行自省. + +要分析堆栈帧, 请使用 `addressToLine`, `addressToSymbol` and `demangle` [内省函数](../../sql-reference/functions/introspection.md). + +列信息: + +- `thread_name` ([String](../../sql-reference/data-types/string.md)) — 线程名称. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 线程标识符. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — 用于获取从[query_log](../system-tables/query_log.md) 系统表运行的查询的详细信息查询标识符. +- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — [堆栈跟踪](https://en.wikipedia.org/wiki/Stack_trace) 表示存储调用方法的物理地址列表. + +**示例** + +启用内省功能: + +``` sql +SET allow_introspection_functions = 1; +``` + +从 ClickHouse 目标文件中获取符号: + +``` sql +WITH arrayMap(x -> demangle(addressToSymbol(x)), trace) AS all SELECT thread_name, thread_id, query_id, arrayStringConcat(all, '\n') AS res FROM system.stack_trace LIMIT 1 \G; +``` + +``` text +Row 1: +────── +thread_name: clickhouse-serv + +thread_id: 686 +query_id: 1a11f70b-626d-47c1-b948-f9c7b206395d +res: sigqueue +DB::StorageSystemStackTrace::fillData(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::Context const&, DB::SelectQueryInfo const&) const +DB::IStorageSystemOneBlock::read(std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&, DB::SelectQueryInfo const&, DB::Context const&, DB::QueryProcessingStage::Enum, unsigned long, unsigned int) +DB::InterpreterSelectQuery::executeFetchColumns(DB::QueryProcessingStage::Enum, DB::QueryPipeline&, std::__1::shared_ptr const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) +DB::InterpreterSelectQuery::executeImpl(DB::QueryPipeline&, std::__1::shared_ptr const&, std::__1::optional) +DB::InterpreterSelectQuery::execute() +DB::InterpreterSelectWithUnionQuery::execute() +DB::executeQueryImpl(char const*, char const*, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool, DB::ReadBuffer*) +DB::executeQuery(std::__1::basic_string, std::__1::allocator > const&, DB::Context&, bool, DB::QueryProcessingStage::Enum, bool) +DB::TCPHandler::runImpl() +DB::TCPHandler::run() +Poco::Net::TCPServerConnection::start() +Poco::Net::TCPServerDispatcher::run() +Poco::PooledThread::run() +Poco::ThreadImpl::runnableEntry(void*) +start_thread +__clone +``` + +在 ClickHouse 源代码中获取文件名和行号: + +``` sql +WITH arrayMap(x -> addressToLine(x), trace) AS all, arrayFilter(x -> x LIKE '%/dbms/%', all) AS dbms SELECT thread_name, thread_id, query_id, arrayStringConcat(notEmpty(dbms) ? dbms : all, '\n') AS res FROM system.stack_trace LIMIT 1 \G; +``` + +``` text +Row 1: +────── +thread_name: clickhouse-serv + +thread_id: 686 +query_id: cad353e7-1c29-4b2e-949f-93e597ab7a54 +res: /lib/x86_64-linux-gnu/libc-2.27.so +/build/obj-x86_64-linux-gnu/../src/Storages/System/StorageSystemStackTrace.cpp:182 +/build/obj-x86_64-linux-gnu/../contrib/libcxx/include/vector:656 +/build/obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectQuery.cpp:1338 +/build/obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectQuery.cpp:751 +/build/obj-x86_64-linux-gnu/../contrib/libcxx/include/optional:224 +/build/obj-x86_64-linux-gnu/../src/Interpreters/InterpreterSelectWithUnionQuery.cpp:192 +/build/obj-x86_64-linux-gnu/../src/Interpreters/executeQuery.cpp:384 +/build/obj-x86_64-linux-gnu/../src/Interpreters/executeQuery.cpp:643 +/build/obj-x86_64-linux-gnu/../src/Server/TCPHandler.cpp:251 +/build/obj-x86_64-linux-gnu/../src/Server/TCPHandler.cpp:1197 +/build/obj-x86_64-linux-gnu/../contrib/poco/Net/src/TCPServerConnection.cpp:57 +/build/obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:856 +/build/obj-x86_64-linux-gnu/../contrib/poco/Foundation/include/Poco/Mutex_POSIX.h:59 +/build/obj-x86_64-linux-gnu/../contrib/poco/Foundation/include/Poco/AutoPtr.h:223 +/lib/x86_64-linux-gnu/libpthread-2.27.so +/lib/x86_64-linux-gnu/libc-2.27.so +``` + +**另请参阅** + +- [Introspection Functions](../../sql-reference/functions/introspection.md) — 哪些内省功能是可用的以及如何使用它们. +- [system.trace_log](../system-tables/trace_log.md) — 包含由抽样查询分析器收集的堆栈跟踪. +- [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — `arrayMap` 函数的描述和使用示例 +- [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — `arrayFilter` 函数的描述和使用示例. diff --git a/docs/zh/operations/system-tables/time_zones.md b/docs/zh/operations/system-tables/time_zones.md deleted file mode 120000 index d7b0f07d326..00000000000 --- a/docs/zh/operations/system-tables/time_zones.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/time_zones.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/time_zones.md b/docs/zh/operations/system-tables/time_zones.md new file mode 100644 index 00000000000..d01725edf5b --- /dev/null +++ b/docs/zh/operations/system-tables/time_zones.md @@ -0,0 +1,30 @@ +# system.time_zones {#system-time_zones} + +包含 ClickHouse 服务器支持的时区列表. 此时区列表可能因 ClickHouse 的版本而异 + +列信息: + +- `time_zone` (String) — List of supported time zones. + +**示例** + +``` sql +SELECT * FROM system.time_zones LIMIT 10 +``` + +``` text +┌─time_zone──────────┐ +│ Africa/Abidjan │ +│ Africa/Accra │ +│ Africa/Addis_Ababa │ +│ Africa/Algiers │ +│ Africa/Asmara │ +│ Africa/Asmera │ +│ Africa/Bamako │ +│ Africa/Bangui │ +│ Africa/Banjul │ +│ Africa/Bissau │ +└────────────────────┘ +``` + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/time_zones) diff --git a/docs/zh/operations/system-tables/users.md b/docs/zh/operations/system-tables/users.md deleted file mode 120000 index 540c5ac6620..00000000000 --- a/docs/zh/operations/system-tables/users.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/users.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/users.md b/docs/zh/operations/system-tables/users.md new file mode 100644 index 00000000000..521c54de801 --- /dev/null +++ b/docs/zh/operations/system-tables/users.md @@ -0,0 +1,34 @@ +# system.users {#system_tables-users} + +包含服务器上配置的[用户账号](../../operations/access-rights.md#user-account-management)的列表. + +列信息: +- `name` ([String](../../sql-reference/data-types/string.md)) — 用户名称. + +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — 用户 ID. + +- `storage` ([String](../../sql-reference/data-types/string.md)) — 用户存储路径. 在 `access_control_path` 参数中配置. + +- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3)) — 显示认证类型. 有多种用户识别方式: 无密码, 纯文本密码, [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-编码的密码. + +- `auth_params` ([String](../../sql-reference/data-types/string.md)) — JSON 格式的身份验证参数取决于`auth_type`. + +- `host_ip` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 许连接到 ClickHouse 服务器的主机的 IP 地址. + +- `host_names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 允许连接到 ClickHouse 服务器的主机名称. + +- `host_names_regexp` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 允许连接到 ClickHouse 服务器的主机名的正则表达式. + +- `host_names_like` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 使用 LIKE 谓词设置允许连接到 ClickHouse 服务器的主机名称. + +- `default_roles_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示默认情况下为用户设置的所有授予的角色. + +- `default_roles_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 默认提供的授权角色列表. + +- `default_roles_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 除了列出的角色之外所有授予的角色都设置为默认值. + +## 另请参阅 {#see-also} + +- [查看用户信息](../../sql-reference/statements/show.md#show-users-statement) + +[原始文章](https://clickhouse.com/docs/en/operations/system-tables/users) diff --git a/docs/zh/operations/system-tables/zookeeper_log.md b/docs/zh/operations/system-tables/zookeeper_log.md deleted file mode 120000 index c7db82e978a..00000000000 --- a/docs/zh/operations/system-tables/zookeeper_log.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/zookeeper_log.md \ No newline at end of file diff --git a/docs/zh/operations/system-tables/zookeeper_log.md b/docs/zh/operations/system-tables/zookeeper_log.md new file mode 100644 index 00000000000..678e2b8a93b --- /dev/null +++ b/docs/zh/operations/system-tables/zookeeper_log.md @@ -0,0 +1,129 @@ +# system.zookeeper_log {#system-zookeeper_log} + +此表包含有关对 ZooKeeper 服务器的请求及其响应的参数的信息. + +对于请求,只填充有请求参数的列,其余列填充默认值 (`0` or `NULL`). 当响应到达时,来自响应的数据被添加到其他列. + +带有请求参数的列: + +- `type` ([Enum](../../sql-reference/data-types/enum.md)) — ZooKeeper 客户端中的事件类型. 可以具有以下值之一: + - `Request` — 请求已发送. + - `Response` — 已收到回复. + - `Finalize` — 连接丢失, 未收到响应. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件发生的日期. +- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件发生的日期和时间. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 用于发出请求的 ZooKeeper 服务器的 IP 地址. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — 用于发出请求的 ZooKeeper 服务器的端口. +- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper 服务器为每个连接设置的会话 ID. +- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — 会话中请求的 ID. 这通常是一个连续的请求编号. 请求行和配对的 `response`/`finalize` 行相同. +- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 请求是否设置了 [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches) . +- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — 请求或响应的类型. +- `path` ([String](../../sql-reference/data-types/string.md)) — 请求中指定的 ZooKeeper 节点的路径, 如果请求不需要指定路径, 则为空字符串. +- `data` ([String](../../sql-reference/data-types/string.md)) — 写入 ZooKeeper 节点的数据(对于 `SET` 和 `CREATE` 请求 - 请求想要写入的内容,对于 `GET` 请求的响应 - 读取的内容)或空字符串. +- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — ZooKeeper 节点是否被创建为 [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes). +- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — ZooKeeper 节点是否被创建为 [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming). +- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — 请求执行时期望的 ZooKeeper 节点的版本. 这支持`CHECK`、`SET`、`REMOVE`请求(如果请求不检查版本, 则为相关的`-1`或不支持版本检查的其他请求的`NULL`). +- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) —多请求中包含的请求数(这是一个特殊的请求,由几个连续的普通请求组成, 并以原子方式执行). 多请求中包含的所有请求都将具有相同的 `xid`. +- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 包含在多请求中的请求数(对于多请求 — `0`,然后从 `1` 开始). + +带有请求响应参数的列: + +- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper 事务 ID. ZooKeeper 服务器响应成功执行的请求而发出的序列号(`0` 表示请求没有执行/返回错误/客户端不知道请求是否被执行). +- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — 错误代码. 可以有很多值, 这里只是其中的一些: + - `ZOK` — 请求被安全执行. + - `ZCONNECTIONLOSS` — 连接丢失. + - `ZOPERATIONTIMEOUT` — 请求执行超时已过期. + - `ZSESSIONEXPIRED` — 会话已过期. + - `NULL` — 请求完成. +- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — `watch` 事件的类型(对于带有 `op_num` = `Watch` 的响应), 对于其余响应:`NULL`. +- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — `watch` 事件的状态(对于带有 `op_num` = `Watch` 的响应), 对于其余响应:`NULL`. +- `path_created` ([String](../../sql-reference/data-types/string.md)) — 创建的 ZooKeeper 节点的路径(用于响应 `CREATE` 请求),如果节点被创建为 `sequential`, 则可能与 `path` 不同. +- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — 导致创建此 ZooKeeper 节点的更改的 `zxid`. +- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — 最后一次修改该ZooKeeper节点的 `zxid`. +- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — 最后一次修改该ZooKeeper节点的子节点的事务ID +- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — 此 ZooKeeper 节点的数据更改次数. +- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — 此 ZooKeeper 节点的子节点的更改次数. +- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — 这个 ZooKeeper 节点的数据字段的长度. +- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — 此 ZooKeeper 节点的子节点数. +- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — ZooKeeper 子节点列表(用于响应 `LIST` 请求). + +**示例** + +查询: + +``` sql +SELECT * FROM system.zookeeper_log WHERE (session_id = '106662742089334927') AND (xid = '10858') FORMAT Vertical; +``` + +结果: + +``` text +Row 1: +────── +type: Request +event_date: 2021-08-09 +event_time: 2021-08-09 21:38:30.291792 +address: :: +port: 2181 +session_id: 106662742089334927 +xid: 10858 +has_watch: 1 +op_num: List +path: /clickhouse/task_queue/ddl +data: +is_ephemeral: 0 +is_sequential: 0 +version: ᴺᵁᴸᴸ +requests_size: 0 +request_idx: 0 +zxid: 0 +error: ᴺᵁᴸᴸ +watch_type: ᴺᵁᴸᴸ +watch_state: ᴺᵁᴸᴸ +path_created: +stat_czxid: 0 +stat_mzxid: 0 +stat_pzxid: 0 +stat_version: 0 +stat_cversion: 0 +stat_dataLength: 0 +stat_numChildren: 0 +children: [] + +Row 2: +────── +type: Response +event_date: 2021-08-09 +event_time: 2021-08-09 21:38:30.292086 +address: :: +port: 2181 +session_id: 106662742089334927 +xid: 10858 +has_watch: 1 +op_num: List +path: /clickhouse/task_queue/ddl +data: +is_ephemeral: 0 +is_sequential: 0 +version: ᴺᵁᴸᴸ +requests_size: 0 +request_idx: 0 +zxid: 16926267 +error: ZOK +watch_type: ᴺᵁᴸᴸ +watch_state: ᴺᵁᴸᴸ +path_created: +stat_czxid: 16925469 +stat_mzxid: 16925469 +stat_pzxid: 16926179 +stat_version: 0 +stat_cversion: 7 +stat_dataLength: 0 +stat_numChildren: 7 +children: ['query-0000000006','query-0000000005','query-0000000004','query-0000000003','query-0000000002','query-0000000001','query-0000000000'] +``` + +**另请参阅** + +- [ZooKeeper](../../operations/tips.md#zookeeper) +- [ZooKeeper 指南](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 4951106f595..0ad0764d721 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -403,6 +403,36 @@ void Client::initialize(Poco::Util::Application & self) } +void Client::prepareForInteractive() +{ + clearTerminal(); + showClientVersion(); + + if (delayed_interactive) + std::cout << std::endl; + + /// Load Warnings at the beginning of connection + if (!config().has("no-warnings")) + { + try + { + std::vector messages = loadWarningMessages(); + if (!messages.empty()) + { + std::cout << "Warnings:" << std::endl; + for (const auto & message : messages) + std::cout << " * " << message << std::endl; + std::cout << std::endl; + } + } + catch (...) + { + /// Ignore exception + } + } +} + + int Client::main(const std::vector & /*args*/) try { @@ -429,36 +459,11 @@ try processConfig(); - if (is_interactive) - { - clearTerminal(); - showClientVersion(); - } - connect(); - if (is_interactive) + if (is_interactive && !delayed_interactive) { - /// Load Warnings at the beginning of connection - if (!config().has("no-warnings")) - { - try - { - std::vector messages = loadWarningMessages(); - if (!messages.empty()) - { - std::cout << "Warnings:" << std::endl; - for (const auto & message : messages) - std::cout << " * " << message << std::endl; - std::cout << std::endl; - } - } - catch (...) - { - /// Ignore exception - } - } - + prepareForInteractive(); runInteractive(); } else @@ -482,6 +487,12 @@ try // case so that at least we don't lose an error. return -1; } + + if (delayed_interactive) + { + prepareForInteractive(); + runInteractive(); + } } return 0; @@ -555,8 +566,9 @@ void Client::connect() if (is_interactive) { std::cout << "Connected to " << server_name << " server version " << server_version << " revision " << server_revision << "." - << std::endl << std::endl; + if (!delayed_interactive) + std::cout << std::endl; auto client_version_tuple = std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH); auto server_version_tuple = std::make_tuple(server_version_major, server_version_minor, server_version_patch); @@ -1007,9 +1019,6 @@ void Client::addOptions(OptionsDescription & options_description) ("max_client_network_bandwidth", po::value(), "the maximum speed of data exchange over the network for the client in bytes per second.") ("compression", po::value(), "enable or disable compression") - ("log-level", po::value(), "client log level") - ("server_logs_file", po::value(), "put server logs into specified file") - ("query-fuzzer-runs", po::value()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.") ("interleave-queries-file", po::value>()->multitoken(), "file path with queries to execute before every file from 'queries-file'; multiple files can be specified (--queries-file file1 file2...); this is needed to enable more aggressive fuzzing of newly added tests (see 'query-fuzzer-runs' option)") @@ -1125,8 +1134,6 @@ void Client::processOptions(const OptionsDescription & options_description, max_client_network_bandwidth = options["max_client_network_bandwidth"].as(); if (options.count("compression")) config().setBool("compression", options["compression"].as()); - if (options.count("server_logs_file")) - server_logs_file = options["server_logs_file"].as(); if (options.count("no-warnings")) config().setBool("no-warnings", true); @@ -1161,11 +1168,11 @@ void Client::processConfig() /// - stdin is not a terminal. In this case queries are read from it. /// - -qf (--queries-file) command line option is present. /// The value of the option is used as file with query (or of multiple queries) to execute. - if (stdin_is_a_tty && !config().has("query") && queries_files.empty()) - { - if (config().has("query") && config().has("queries-file")) - throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); + delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file")); + if (stdin_is_a_tty + && (delayed_interactive || (!config().has("query") && queries_files.empty()))) + { is_interactive = true; } else diff --git a/programs/client/Client.h b/programs/client/Client.h index 2def74ef3fc..b146134bc94 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -20,6 +20,7 @@ protected: bool processWithFuzzing(const String & full_query) override; void connect() override; + void prepareForInteractive() override; void processError(const String & query) const override; String getName() const override { return "client"; } diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 9753e7f7c5d..f6849eb76de 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -182,23 +183,6 @@ void LocalServer::initialize(Poco::Util::Application & self) auto loaded_config = config_processor.loadConfig(); config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); } - - if (config().has("logger.console") || config().has("logger.level") || config().has("logger.log")) - { - // force enable logging - config().setString("logger", "logger"); - // sensitive data rules are not used here - buildLoggers(config(), logger(), "clickhouse-local"); - } - else - { - // Turn off server logging to stderr - if (!config().has("verbose")) - { - Poco::Logger::root().setLevel("none"); - Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::NullChannel())); - } - } } @@ -412,6 +396,14 @@ void LocalServer::connect() } +void LocalServer::prepareForInteractive() +{ + clearTerminal(); + showClientVersion(); + std::cerr << std::endl; +} + + int LocalServer::main(const std::vector & /*args*/) try { @@ -422,7 +414,10 @@ try std::cout << std::fixed << std::setprecision(3); std::cerr << std::fixed << std::setprecision(3); - is_interactive = stdin_is_a_tty && !config().has("query") && !config().has("table-structure") && queries_files.empty(); + is_interactive = stdin_is_a_tty + && (config().hasOption("interactive") + || (!config().has("query") && !config().has("table-structure") && queries_files.empty())); + if (!is_interactive) { /// We will terminate process on error @@ -443,17 +438,20 @@ try applyCmdSettings(global_context); connect(); - if (is_interactive) + if (is_interactive && !delayed_interactive) { - clearTerminal(); - showClientVersion(); - std::cerr << std::endl; - + prepareForInteractive(); runInteractive(); } else { runNonInteractive(); + + if (delayed_interactive) + { + prepareForInteractive(); + runInteractive(); + } } cleanup(); @@ -478,7 +476,8 @@ catch (...) void LocalServer::processConfig() { - if (is_interactive) + delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file")); + if (is_interactive && !delayed_interactive) { if (config().has("query") && config().has("queries-file")) throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); @@ -490,6 +489,11 @@ void LocalServer::processConfig() } else { + if (delayed_interactive) + { + load_suggestions = true; + } + need_render_progress = config().getBool("progress", false); echo_queries = config().hasOption("echo") || config().hasOption("verbose"); ignore_error = config().getBool("ignore-error", false); @@ -497,6 +501,35 @@ void LocalServer::processConfig() } print_stack_trace = config().getBool("stacktrace", false); + auto logging = (config().has("logger.console") + || config().has("logger.level") + || config().has("log-level") + || config().has("logger.log")); + + auto file_logging = config().has("server_logs_file"); + if (is_interactive && logging && !file_logging) + throw Exception("For interactive mode logging is allowed only with --server_logs_file option", + ErrorCodes::BAD_ARGUMENTS); + + if (file_logging) + { + auto level = Poco::Logger::parseLevel(config().getString("log-level", "trace")); + Poco::Logger::root().setLevel(level); + Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::SimpleFileChannel(server_logs_file))); + } + else if (logging) + { + // force enable logging + config().setString("logger", "logger"); + // sensitive data rules are not used here + buildLoggers(config(), logger(), "clickhouse-local"); + } + else + { + Poco::Logger::root().setLevel("none"); + Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::NullChannel())); + } + shared_context = Context::createShared(); global_context = Context::createGlobal(shared_context.get()); diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index ce0df06c86a..e87e6bd9a0d 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -34,6 +34,7 @@ protected: bool executeMultiQuery(const String & all_queries_text) override; void connect() override; + void prepareForInteractive() override; void processError(const String & query) const override; String getName() const override { return "local"; } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index ce7d6973f68..29845f23d92 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include #include @@ -86,7 +86,7 @@ # include "config_core.h" # include "Common/config_version.h" # if USE_OPENCL -# include "Common/BitonicSort.h" // Y_IGNORE +# include "Common/BitonicSort.h" # endif #endif @@ -595,8 +595,8 @@ if (ThreadFuzzer::instance().isEffective()) if (config().getBool("remap_executable", false)) { LOG_DEBUG(log, "Will remap executable in memory."); - remapExecutable(); - LOG_DEBUG(log, "The code in memory has been successfully remapped."); + size_t size = remapExecutable(); + LOG_DEBUG(log, "The code ({}) in memory has been successfully remapped.", ReadableSize(size)); } if (config().getBool("mlock_executable", false)) @@ -883,7 +883,7 @@ if (ThreadFuzzer::instance().isEffective()) }, /* already_loaded = */ false); /// Reload it right now (initial loading) - auto & access_control = global_context->getAccessControlManager(); + auto & access_control = global_context->getAccessControl(); if (config().has("custom_settings_prefixes")) access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes")); diff --git a/programs/server/config.d/metric_log.xml b/programs/server/config.d/metric_log.xml deleted file mode 120000 index 7f033c60a64..00000000000 --- a/programs/server/config.d/metric_log.xml +++ /dev/null @@ -1 +0,0 @@ -../../../tests/config/config.d/metric_log.xml \ No newline at end of file diff --git a/programs/server/config.d/part_log.xml b/programs/server/config.d/part_log.xml deleted file mode 120000 index d97ea7f226d..00000000000 --- a/programs/server/config.d/part_log.xml +++ /dev/null @@ -1 +0,0 @@ -../../../tests/config/config.d/part_log.xml \ No newline at end of file diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControl.cpp similarity index 84% rename from src/Access/AccessControlManager.cpp rename to src/Access/AccessControl.cpp index ff3ebdea9f8..378d8e2c264 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControl.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -55,10 +55,10 @@ namespace } -class AccessControlManager::ContextAccessCache +class AccessControl::ContextAccessCache { public: - explicit ContextAccessCache(const AccessControlManager & manager_) : manager(manager_) {} + explicit ContextAccessCache(const AccessControl & access_control_) : access_control(access_control_) {} std::shared_ptr getContextAccess(const ContextAccessParams & params) { @@ -71,19 +71,19 @@ public: /// No user, probably the user has been dropped while it was in the cache. cache.remove(params); } - auto res = std::shared_ptr(new ContextAccess(manager, params)); + auto res = std::shared_ptr(new ContextAccess(access_control, params)); cache.add(params, res); return res; } private: - const AccessControlManager & manager; + const AccessControl & access_control; Poco::ExpireCache> cache; std::mutex mutex; }; -class AccessControlManager::CustomSettingsPrefixes +class AccessControl::CustomSettingsPrefixes { public: void registerPrefixes(const Strings & prefixes_) @@ -130,7 +130,7 @@ private: }; -AccessControlManager::AccessControlManager() +AccessControl::AccessControl() : MultipleAccessStorage("user directories"), context_access_cache(std::make_unique(*this)), role_cache(std::make_unique(*this)), @@ -143,9 +143,9 @@ AccessControlManager::AccessControlManager() } -AccessControlManager::~AccessControlManager() = default; +AccessControl::~AccessControl() = default; -void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_) +void AccessControl::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_) { auto storages = getStoragesPtr(); for (const auto & storage : *storages) @@ -159,12 +159,12 @@ void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguratio addUsersConfigStorage(users_config_); } -void AccessControlManager::addUsersConfigStorage(const Poco::Util::AbstractConfiguration & users_config_) +void AccessControl::addUsersConfigStorage(const Poco::Util::AbstractConfiguration & users_config_) { addUsersConfigStorage(UsersConfigAccessStorage::STORAGE_TYPE, users_config_); } -void AccessControlManager::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_) +void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_) { auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); }; auto new_storage = std::make_shared(storage_name_, check_setting_name_function); @@ -173,7 +173,7 @@ void AccessControlManager::addUsersConfigStorage(const String & storage_name_, c LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath()); } -void AccessControlManager::addUsersConfigStorage( +void AccessControl::addUsersConfigStorage( const String & users_config_path_, const String & include_from_path_, const String & preprocessed_dir_, @@ -183,7 +183,7 @@ void AccessControlManager::addUsersConfigStorage( UsersConfigAccessStorage::STORAGE_TYPE, users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_); } -void AccessControlManager::addUsersConfigStorage( +void AccessControl::addUsersConfigStorage( const String & storage_name_, const String & users_config_path_, const String & include_from_path_, @@ -206,7 +206,7 @@ void AccessControlManager::addUsersConfigStorage( LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath()); } -void AccessControlManager::reloadUsersConfigs() +void AccessControl::reloadUsersConfigs() { auto storages = getStoragesPtr(); for (const auto & storage : *storages) @@ -216,7 +216,7 @@ void AccessControlManager::reloadUsersConfigs() } } -void AccessControlManager::startPeriodicReloadingUsersConfigs() +void AccessControl::startPeriodicReloadingUsersConfigs() { auto storages = getStoragesPtr(); for (const auto & storage : *storages) @@ -226,7 +226,7 @@ void AccessControlManager::startPeriodicReloadingUsersConfigs() } } -void AccessControlManager::addReplicatedStorage( +void AccessControl::addReplicatedStorage( const String & storage_name_, const String & zookeeper_path_, const zkutil::GetZooKeeper & get_zookeeper_function_) @@ -243,12 +243,12 @@ void AccessControlManager::addReplicatedStorage( new_storage->startup(); } -void AccessControlManager::addDiskStorage(const String & directory_, bool readonly_) +void AccessControl::addDiskStorage(const String & directory_, bool readonly_) { addDiskStorage(DiskAccessStorage::STORAGE_TYPE, directory_, readonly_); } -void AccessControlManager::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_) +void AccessControl::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_) { auto storages = getStoragesPtr(); for (const auto & storage : *storages) @@ -269,7 +269,7 @@ void AccessControlManager::addDiskStorage(const String & storage_name_, const St } -void AccessControlManager::addMemoryStorage(const String & storage_name_) +void AccessControl::addMemoryStorage(const String & storage_name_) { auto storages = getStoragesPtr(); for (const auto & storage : *storages) @@ -283,7 +283,7 @@ void AccessControlManager::addMemoryStorage(const String & storage_name_) } -void AccessControlManager::addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_) +void AccessControl::addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_) { auto new_storage = std::make_shared(storage_name_, this, config_, prefix_); addStorage(new_storage); @@ -291,7 +291,7 @@ void AccessControlManager::addLDAPStorage(const String & storage_name_, const Po } -void AccessControlManager::addStoragesFromUserDirectoriesConfig( +void AccessControl::addStoragesFromUserDirectoriesConfig( const Poco::Util::AbstractConfiguration & config, const String & key, const String & config_dir, @@ -350,7 +350,7 @@ void AccessControlManager::addStoragesFromUserDirectoriesConfig( } -void AccessControlManager::addStoragesFromMainConfig( +void AccessControl::addStoragesFromMainConfig( const Poco::Util::AbstractConfiguration & config, const String & config_path, const zkutil::GetZooKeeper & get_zookeeper_function) @@ -388,47 +388,47 @@ void AccessControlManager::addStoragesFromMainConfig( } -UUID AccessControlManager::login(const Credentials & credentials, const Poco::Net::IPAddress & address) const +UUID AccessControl::login(const Credentials & credentials, const Poco::Net::IPAddress & address) const { return MultipleAccessStorage::login(credentials, address, *external_authenticators); } -void AccessControlManager::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) +void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { external_authenticators->setConfiguration(config, getLogger()); } -void AccessControlManager::setDefaultProfileName(const String & default_profile_name) +void AccessControl::setDefaultProfileName(const String & default_profile_name) { settings_profiles_cache->setDefaultProfileName(default_profile_name); } -void AccessControlManager::setCustomSettingsPrefixes(const Strings & prefixes) +void AccessControl::setCustomSettingsPrefixes(const Strings & prefixes) { custom_settings_prefixes->registerPrefixes(prefixes); } -void AccessControlManager::setCustomSettingsPrefixes(const String & comma_separated_prefixes) +void AccessControl::setCustomSettingsPrefixes(const String & comma_separated_prefixes) { Strings prefixes; splitInto<','>(prefixes, comma_separated_prefixes); setCustomSettingsPrefixes(prefixes); } -bool AccessControlManager::isSettingNameAllowed(const std::string_view & setting_name) const +bool AccessControl::isSettingNameAllowed(const std::string_view & setting_name) const { return custom_settings_prefixes->isSettingNameAllowed(setting_name); } -void AccessControlManager::checkSettingNameIsAllowed(const std::string_view & setting_name) const +void AccessControl::checkSettingNameIsAllowed(const std::string_view & setting_name) const { custom_settings_prefixes->checkSettingNameIsAllowed(setting_name); } -std::shared_ptr AccessControlManager::getContextAccess( +std::shared_ptr AccessControl::getContextAccess( const UUID & user_id, const std::vector & current_roles, bool use_default_roles, @@ -464,13 +464,13 @@ std::shared_ptr AccessControlManager::getContextAccess( } -std::shared_ptr AccessControlManager::getContextAccess(const ContextAccessParams & params) const +std::shared_ptr AccessControl::getContextAccess(const ContextAccessParams & params) const { return context_access_cache->getContextAccess(params); } -std::shared_ptr AccessControlManager::getEnabledRoles( +std::shared_ptr AccessControl::getEnabledRoles( const std::vector & current_roles, const std::vector & current_roles_with_admin_option) const { @@ -478,13 +478,13 @@ std::shared_ptr AccessControlManager::getEnabledRoles( } -std::shared_ptr AccessControlManager::getEnabledRowPolicies(const UUID & user_id, const boost::container::flat_set & enabled_roles) const +std::shared_ptr AccessControl::getEnabledRowPolicies(const UUID & user_id, const boost::container::flat_set & enabled_roles) const { return row_policy_cache->getEnabledRowPolicies(user_id, enabled_roles); } -std::shared_ptr AccessControlManager::getEnabledQuota( +std::shared_ptr AccessControl::getEnabledQuota( const UUID & user_id, const String & user_name, const boost::container::flat_set & enabled_roles, @@ -496,13 +496,13 @@ std::shared_ptr AccessControlManager::getEnabledQuota( } -std::vector AccessControlManager::getAllQuotasUsage() const +std::vector AccessControl::getAllQuotasUsage() const { return quota_cache->getAllQuotasUsage(); } -std::shared_ptr AccessControlManager::getEnabledSettings( +std::shared_ptr AccessControl::getEnabledSettings( const UUID & user_id, const SettingsProfileElements & settings_from_user, const boost::container::flat_set & enabled_roles, @@ -511,13 +511,13 @@ std::shared_ptr AccessControlManager::getEnabledSettings( return settings_profiles_cache->getEnabledSettings(user_id, settings_from_user, enabled_roles, settings_from_enabled_roles); } -std::shared_ptr AccessControlManager::getSettingsProfileInfo(const UUID & profile_id) +std::shared_ptr AccessControl::getSettingsProfileInfo(const UUID & profile_id) { return settings_profiles_cache->getSettingsProfileInfo(profile_id); } -const ExternalAuthenticators & AccessControlManager::getExternalAuthenticators() const +const ExternalAuthenticators & AccessControl::getExternalAuthenticators() const { return *external_authenticators; } diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControl.h similarity index 98% rename from src/Access/AccessControlManager.h rename to src/Access/AccessControl.h index 79f7073ef69..779334f2a45 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControl.h @@ -42,11 +42,11 @@ struct Settings; /// Manages access control entities. -class AccessControlManager : public MultipleAccessStorage +class AccessControl : public MultipleAccessStorage { public: - AccessControlManager(); - ~AccessControlManager() override; + AccessControl(); + ~AccessControl() override; /// Parses access entities from a configuration loaded from users.xml. /// This function add UsersConfigAccessStorage if it wasn't added before. diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index 2160f3e9db8..199b3b22efc 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -7,26 +7,26 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/src/Access/AccessFlags.h b/src/Access/AccessFlags.h deleted file mode 100644 index b107248e02b..00000000000 --- a/src/Access/AccessFlags.h +++ /dev/null @@ -1,484 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/// Represents a combination of access types which can be granted globally, on databases, tables, columns, etc. -/// For example "SELECT, CREATE USER" is an access type. -class AccessFlags -{ -public: - AccessFlags(AccessType type); - - /// The same as AccessFlags(AccessType::NONE). - AccessFlags() = default; - - /// Constructs from a string like "SELECT". - AccessFlags(const std::string_view & keyword); - - /// Constructs from a list of strings like "SELECT, UPDATE, INSERT". - AccessFlags(const std::vector & keywords); - AccessFlags(const Strings & keywords); - - AccessFlags(const AccessFlags & src) = default; - AccessFlags(AccessFlags && src) = default; - AccessFlags & operator =(const AccessFlags & src) = default; - AccessFlags & operator =(AccessFlags && src) = default; - - /// Returns the access type which contains two specified access types. - AccessFlags & operator |=(const AccessFlags & other) { flags |= other.flags; return *this; } - friend AccessFlags operator |(const AccessFlags & left, const AccessFlags & right) { return AccessFlags(left) |= right; } - - /// Returns the access type which contains the common part of two access types. - AccessFlags & operator &=(const AccessFlags & other) { flags &= other.flags; return *this; } - friend AccessFlags operator &(const AccessFlags & left, const AccessFlags & right) { return AccessFlags(left) &= right; } - - /// Returns the access type which contains only the part of the first access type which is not the part of the second access type. - /// (lhs - rhs) is the same as (lhs & ~rhs). - AccessFlags & operator -=(const AccessFlags & other) { flags &= ~other.flags; return *this; } - friend AccessFlags operator -(const AccessFlags & left, const AccessFlags & right) { return AccessFlags(left) -= right; } - - AccessFlags operator ~() const { AccessFlags res; res.flags = ~flags; return res; } - - bool isEmpty() const { return flags.none(); } - explicit operator bool() const { return !isEmpty(); } - bool contains(const AccessFlags & other) const { return (flags & other.flags) == other.flags; } - - friend bool operator ==(const AccessFlags & left, const AccessFlags & right) { return left.flags == right.flags; } - friend bool operator !=(const AccessFlags & left, const AccessFlags & right) { return !(left == right); } - friend bool operator <(const AccessFlags & left, const AccessFlags & right) { return memcmp(&left.flags, &right.flags, sizeof(Flags)) < 0; } - friend bool operator >(const AccessFlags & left, const AccessFlags & right) { return right < left; } - friend bool operator <=(const AccessFlags & left, const AccessFlags & right) { return !(right < left); } - friend bool operator >=(const AccessFlags & left, const AccessFlags & right) { return !(left < right); } - - void clear() { flags.reset(); } - - /// Returns a comma-separated list of keywords, like "SELECT, CREATE USER, UPDATE". - String toString() const; - - /// Returns a list of access types. - std::vector toAccessTypes() const; - - /// Returns a list of keywords. - std::vector toKeywords() const; - - /// Returns all the flags. - /// These are the same as (allGlobalFlags() | allDatabaseFlags() | allTableFlags() | allColumnsFlags() | allDictionaryFlags()). - static AccessFlags allFlags(); - - /// Returns all the global flags. - static AccessFlags allGlobalFlags(); - - /// Returns all the flags related to a database. - static AccessFlags allDatabaseFlags(); - - /// Returns all the flags related to a table. - static AccessFlags allTableFlags(); - - /// Returns all the flags related to a column. - static AccessFlags allColumnFlags(); - - /// Returns all the flags related to a dictionary. - static AccessFlags allDictionaryFlags(); - - /// Returns all the flags which could be granted on the global level. - /// The same as allFlags(). - static AccessFlags allFlagsGrantableOnGlobalLevel(); - - /// Returns all the flags which could be granted on the database level. - /// Returns allDatabaseFlags() | allTableFlags() | allDictionaryFlags() | allColumnFlags(). - static AccessFlags allFlagsGrantableOnDatabaseLevel(); - - /// Returns all the flags which could be granted on the table level. - /// Returns allTableFlags() | allDictionaryFlags() | allColumnFlags(). - static AccessFlags allFlagsGrantableOnTableLevel(); - - /// Returns all the flags which could be granted on the global level. - /// The same as allColumnFlags(). - static AccessFlags allFlagsGrantableOnColumnLevel(); - -private: - static constexpr size_t NUM_FLAGS = 128; - using Flags = std::bitset; - Flags flags; - - AccessFlags(const Flags & flags_) : flags(flags_) {} - - template - class Impl; -}; - - -namespace ErrorCodes -{ - extern const int UNKNOWN_ACCESS_TYPE; -} - -template -class AccessFlags::Impl -{ -public: - static const Impl & instance() - { - static const Impl res; - return res; - } - - Flags accessTypeToFlags(AccessType type) const - { - return access_type_to_flags_mapping[static_cast(type)]; - } - - Flags keywordToFlags(const std::string_view & keyword) const - { - auto it = keyword_to_flags_map.find(keyword); - if (it == keyword_to_flags_map.end()) - { - String uppercased_keyword{keyword}; - boost::to_upper(uppercased_keyword); - it = keyword_to_flags_map.find(uppercased_keyword); - if (it == keyword_to_flags_map.end()) - throw Exception("Unknown access type: " + String(keyword), ErrorCodes::UNKNOWN_ACCESS_TYPE); - } - return it->second; - } - - Flags keywordsToFlags(const std::vector & keywords) const - { - Flags res; - for (const auto & keyword : keywords) - res |= keywordToFlags(keyword); - return res; - } - - Flags keywordsToFlags(const Strings & keywords) const - { - Flags res; - for (const auto & keyword : keywords) - res |= keywordToFlags(keyword); - return res; - } - - std::vector flagsToAccessTypes(const Flags & flags_) const - { - std::vector access_types; - flagsToAccessTypesRec(flags_, access_types, *all_node); - return access_types; - } - - std::vector flagsToKeywords(const Flags & flags_) const - { - std::vector keywords; - flagsToKeywordsRec(flags_, keywords, *all_node); - return keywords; - } - - String flagsToString(const Flags & flags_) const - { - auto keywords = flagsToKeywords(flags_); - if (keywords.empty()) - return "USAGE"; - String str; - for (const auto & keyword : keywords) - { - if (!str.empty()) - str += ", "; - str += keyword; - } - return str; - } - - const Flags & getAllFlags() const { return all_flags; } - const Flags & getGlobalFlags() const { return all_flags_for_target[GLOBAL]; } - const Flags & getDatabaseFlags() const { return all_flags_for_target[DATABASE]; } - const Flags & getTableFlags() const { return all_flags_for_target[TABLE]; } - const Flags & getColumnFlags() const { return all_flags_for_target[COLUMN]; } - const Flags & getDictionaryFlags() const { return all_flags_for_target[DICTIONARY]; } - const Flags & getAllFlagsGrantableOnGlobalLevel() const { return getAllFlags(); } - const Flags & getAllFlagsGrantableOnDatabaseLevel() const { return all_flags_grantable_on_database_level; } - const Flags & getAllFlagsGrantableOnTableLevel() const { return all_flags_grantable_on_table_level; } - const Flags & getAllFlagsGrantableOnColumnLevel() const { return getColumnFlags(); } - -private: - enum NodeType - { - UNKNOWN = -2, - GROUP = -1, - GLOBAL, - DATABASE, - TABLE, - VIEW = TABLE, - COLUMN, - DICTIONARY, - }; - - struct Node; - using NodePtr = std::unique_ptr; - - struct Node - { - const String keyword; - NodeType node_type; - AccessType access_type = AccessType::NONE; - Strings aliases; - Flags flags; - std::vector children; - - Node(String keyword_, NodeType node_type_ = UNKNOWN) : keyword(std::move(keyword_)), node_type(node_type_) {} - - void setFlag(size_t flag) { flags.set(flag); } - - void addChild(NodePtr child) - { - flags |= child->flags; - children.push_back(std::move(child)); - } - }; - - static String replaceUnderscoreWithSpace(const std::string_view & str) - { - String res{str}; - boost::replace_all(res, "_", " "); - return res; - } - - static Strings splitAliases(const std::string_view & str) - { - Strings aliases; - boost::split(aliases, str, boost::is_any_of(",")); - for (auto & alias : aliases) - boost::trim(alias); - return aliases; - } - - static void makeNode( - AccessType access_type, - const std::string_view & name, - const std::string_view & aliases, - NodeType node_type, - const std::string_view & parent_group_name, - std::unordered_map & nodes, - std::unordered_map & owned_nodes, - size_t & next_flag) - { - NodePtr node; - auto keyword = replaceUnderscoreWithSpace(name); - auto it = owned_nodes.find(keyword); - if (it != owned_nodes.end()) - { - node = std::move(it->second); - owned_nodes.erase(it); - } - else - { - if (nodes.count(keyword)) - throw Exception(keyword + " declared twice", ErrorCodes::LOGICAL_ERROR); - node = std::make_unique(keyword, node_type); - nodes[node->keyword] = node.get(); - } - - node->access_type = access_type; - node->node_type = node_type; - node->aliases = splitAliases(aliases); - if (node_type != GROUP) - node->setFlag(next_flag++); - - bool has_parent_group = (parent_group_name != std::string_view{"NONE"}); - if (!has_parent_group) - { - std::string_view keyword_as_string_view = node->keyword; - owned_nodes[keyword_as_string_view] = std::move(node); - return; - } - - auto parent_keyword = replaceUnderscoreWithSpace(parent_group_name); - auto it_parent = nodes.find(parent_keyword); - if (it_parent == nodes.end()) - { - auto parent_node = std::make_unique(parent_keyword); - it_parent = nodes.emplace(parent_node->keyword, parent_node.get()).first; - assert(!owned_nodes.count(parent_node->keyword)); - std::string_view parent_keyword_as_string_view = parent_node->keyword; - owned_nodes[parent_keyword_as_string_view] = std::move(parent_node); - } - it_parent->second->addChild(std::move(node)); - } - - void makeNodes() - { - std::unordered_map owned_nodes; - std::unordered_map nodes; - size_t next_flag = 0; - -#define MAKE_ACCESS_FLAGS_NODE(name, aliases, node_type, parent_group_name) \ - makeNode(AccessType::name, #name, aliases, node_type, #parent_group_name, nodes, owned_nodes, next_flag); - - APPLY_FOR_ACCESS_TYPES(MAKE_ACCESS_FLAGS_NODE) - -#undef MAKE_ACCESS_FLAGS_NODE - - if (!owned_nodes.count("NONE")) - throw Exception("'NONE' not declared", ErrorCodes::LOGICAL_ERROR); - if (!owned_nodes.count("ALL")) - throw Exception("'ALL' not declared", ErrorCodes::LOGICAL_ERROR); - - all_node = std::move(owned_nodes["ALL"]); - none_node = std::move(owned_nodes["NONE"]); - owned_nodes.erase("ALL"); - owned_nodes.erase("NONE"); - - if (!owned_nodes.empty()) - { - const auto & unused_node = *(owned_nodes.begin()->second); - if (unused_node.node_type == UNKNOWN) - throw Exception("Parent group '" + unused_node.keyword + "' not found", ErrorCodes::LOGICAL_ERROR); - else - throw Exception("Access type '" + unused_node.keyword + "' should have parent group", ErrorCodes::LOGICAL_ERROR); - } - } - - void makeKeywordToFlagsMap(Node * start_node = nullptr) - { - if (!start_node) - { - makeKeywordToFlagsMap(none_node.get()); - start_node = all_node.get(); - } - - start_node->aliases.emplace_back(start_node->keyword); - for (auto & alias : start_node->aliases) - { - boost::to_upper(alias); - keyword_to_flags_map[alias] = start_node->flags; - } - - for (auto & child : start_node->children) - makeKeywordToFlagsMap(child.get()); - } - - void makeAccessTypeToFlagsMapping(Node * start_node = nullptr) - { - if (!start_node) - { - makeAccessTypeToFlagsMapping(none_node.get()); - start_node = all_node.get(); - } - - size_t index = static_cast(start_node->access_type); - access_type_to_flags_mapping.resize(std::max(index + 1, access_type_to_flags_mapping.size())); - access_type_to_flags_mapping[index] = start_node->flags; - - for (auto & child : start_node->children) - makeAccessTypeToFlagsMapping(child.get()); - } - - void collectAllFlags(const Node * start_node = nullptr) - { - if (!start_node) - { - start_node = all_node.get(); - all_flags = start_node->flags; - } - if (start_node->node_type != GROUP) - { - assert(static_cast(start_node->node_type) < std::size(all_flags_for_target)); - all_flags_for_target[start_node->node_type] |= start_node->flags; - } - for (const auto & child : start_node->children) - collectAllFlags(child.get()); - - all_flags_grantable_on_table_level = all_flags_for_target[TABLE] | all_flags_for_target[DICTIONARY] | all_flags_for_target[COLUMN]; - all_flags_grantable_on_database_level = all_flags_for_target[DATABASE] | all_flags_grantable_on_table_level; - } - - Impl() - { - makeNodes(); - makeKeywordToFlagsMap(); - makeAccessTypeToFlagsMapping(); - collectAllFlags(); - } - - static void flagsToAccessTypesRec(const Flags & flags_, std::vector & access_types, const Node & start_node) - { - Flags matching_flags = (flags_ & start_node.flags); - if (matching_flags.any()) - { - if (matching_flags == start_node.flags) - { - access_types.push_back(start_node.access_type); - } - else - { - for (const auto & child : start_node.children) - flagsToAccessTypesRec(flags_, access_types, *child); - } - } - } - - static void flagsToKeywordsRec(const Flags & flags_, std::vector & keywords, const Node & start_node) - { - Flags matching_flags = (flags_ & start_node.flags); - if (matching_flags.any()) - { - if (matching_flags == start_node.flags) - { - keywords.push_back(start_node.keyword); - } - else - { - for (const auto & child : start_node.children) - flagsToKeywordsRec(flags_, keywords, *child); - } - } - } - - NodePtr all_node; - NodePtr none_node; - std::unordered_map keyword_to_flags_map; - std::vector access_type_to_flags_mapping; - Flags all_flags; - Flags all_flags_for_target[static_cast(DICTIONARY) + 1]; - Flags all_flags_grantable_on_database_level; - Flags all_flags_grantable_on_table_level; -}; - - -inline AccessFlags::AccessFlags(AccessType type) : flags(Impl<>::instance().accessTypeToFlags(type)) {} -inline AccessFlags::AccessFlags(const std::string_view & keyword) : flags(Impl<>::instance().keywordToFlags(keyword)) {} -inline AccessFlags::AccessFlags(const std::vector & keywords) : flags(Impl<>::instance().keywordsToFlags(keywords)) {} -inline AccessFlags::AccessFlags(const Strings & keywords) : flags(Impl<>::instance().keywordsToFlags(keywords)) {} -inline String AccessFlags::toString() const { return Impl<>::instance().flagsToString(flags); } -inline std::vector AccessFlags::toAccessTypes() const { return Impl<>::instance().flagsToAccessTypes(flags); } -inline std::vector AccessFlags::toKeywords() const { return Impl<>::instance().flagsToKeywords(flags); } -inline AccessFlags AccessFlags::allFlags() { return Impl<>::instance().getAllFlags(); } -inline AccessFlags AccessFlags::allGlobalFlags() { return Impl<>::instance().getGlobalFlags(); } -inline AccessFlags AccessFlags::allDatabaseFlags() { return Impl<>::instance().getDatabaseFlags(); } -inline AccessFlags AccessFlags::allTableFlags() { return Impl<>::instance().getTableFlags(); } -inline AccessFlags AccessFlags::allColumnFlags() { return Impl<>::instance().getColumnFlags(); } -inline AccessFlags AccessFlags::allDictionaryFlags() { return Impl<>::instance().getDictionaryFlags(); } -inline AccessFlags AccessFlags::allFlagsGrantableOnGlobalLevel() { return Impl<>::instance().getAllFlagsGrantableOnGlobalLevel(); } -inline AccessFlags AccessFlags::allFlagsGrantableOnDatabaseLevel() { return Impl<>::instance().getAllFlagsGrantableOnDatabaseLevel(); } -inline AccessFlags AccessFlags::allFlagsGrantableOnTableLevel() { return Impl<>::instance().getAllFlagsGrantableOnTableLevel(); } -inline AccessFlags AccessFlags::allFlagsGrantableOnColumnLevel() { return Impl<>::instance().getAllFlagsGrantableOnColumnLevel(); } - -inline AccessFlags operator |(AccessType left, AccessType right) { return AccessFlags(left) | right; } -inline AccessFlags operator &(AccessType left, AccessType right) { return AccessFlags(left) & right; } -inline AccessFlags operator -(AccessType left, AccessType right) { return AccessFlags(left) - right; } -inline AccessFlags operator ~(AccessType x) { return ~AccessFlags(x); } - -} diff --git a/src/Access/AccessRights.h b/src/Access/AccessRights.h index 2657b66d824..c3f75b8c303 100644 --- a/src/Access/AccessRights.h +++ b/src/Access/AccessRights.h @@ -1,7 +1,8 @@ #pragma once #include -#include +#include +#include #include #include diff --git a/src/Access/AllowedClientHosts.h b/src/Access/AllowedClientHosts.h deleted file mode 100644 index 30c0dac076e..00000000000 --- a/src/Access/AllowedClientHosts.h +++ /dev/null @@ -1,388 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fs = std::filesystem; - -namespace DB -{ - -using Strings = std::vector; - -/// Represents lists of hosts a user is allowed to connect to server from. -class AllowedClientHosts -{ -public: - using IPAddress = Poco::Net::IPAddress; - - class IPSubnet - { - public: - IPSubnet() {} - IPSubnet(const IPAddress & prefix_, const IPAddress & mask_) { set(prefix_, mask_); } - IPSubnet(const IPAddress & prefix_, size_t num_prefix_bits) { set(prefix_, num_prefix_bits); } - explicit IPSubnet(const IPAddress & address) { set(address); } - explicit IPSubnet(const String & str); - - const IPAddress & getPrefix() const { return prefix; } - const IPAddress & getMask() const { return mask; } - bool isMaskAllBitsOne() const; - String toString() const; - - friend bool operator ==(const IPSubnet & lhs, const IPSubnet & rhs) { return (lhs.prefix == rhs.prefix) && (lhs.mask == rhs.mask); } - friend bool operator !=(const IPSubnet & lhs, const IPSubnet & rhs) { return !(lhs == rhs); } - - private: - void set(const IPAddress & prefix_, const IPAddress & mask_); - void set(const IPAddress & prefix_, size_t num_prefix_bits); - void set(const IPAddress & address); - - IPAddress prefix; - IPAddress mask; - }; - - struct AnyHostTag {}; - - AllowedClientHosts() {} - AllowedClientHosts(AnyHostTag) { addAnyHost(); } - ~AllowedClientHosts() {} - - AllowedClientHosts(const AllowedClientHosts & src) = default; - AllowedClientHosts & operator =(const AllowedClientHosts & src) = default; - AllowedClientHosts(AllowedClientHosts && src) = default; - AllowedClientHosts & operator =(AllowedClientHosts && src) = default; - - /// Removes all contained addresses. This will disallow all hosts. - void clear(); - - bool empty() const; - - /// Allows exact IP address. - /// For example, 213.180.204.3 or 2a02:6b8::3 - void addAddress(const IPAddress & address); - void addAddress(const String & address) { addAddress(IPAddress(address)); } - void removeAddress(const IPAddress & address); - void removeAddress(const String & address) { removeAddress(IPAddress{address}); } - const std::vector & getAddresses() const { return addresses; } - - /// Allows an IP subnet. - /// For example, 312.234.1.1/255.255.255.0 or 2a02:6b8::3/64 - void addSubnet(const IPSubnet & subnet); - void addSubnet(const String & subnet) { addSubnet(IPSubnet{subnet}); } - void addSubnet(const IPAddress & prefix, const IPAddress & mask) { addSubnet(IPSubnet{prefix, mask}); } - void addSubnet(const IPAddress & prefix, size_t num_prefix_bits) { addSubnet(IPSubnet{prefix, num_prefix_bits}); } - void removeSubnet(const IPSubnet & subnet); - void removeSubnet(const String & subnet) { removeSubnet(IPSubnet{subnet}); } - void removeSubnet(const IPAddress & prefix, const IPAddress & mask) { removeSubnet(IPSubnet{prefix, mask}); } - void removeSubnet(const IPAddress & prefix, size_t num_prefix_bits) { removeSubnet(IPSubnet{prefix, num_prefix_bits}); } - const std::vector & getSubnets() const { return subnets; } - - /// Allows an exact host name. The `contains()` function will check that the provided address equals to one of that host's addresses. - void addName(const String & name); - void removeName(const String & name); - const std::vector & getNames() const { return names; } - - /// Allows the host names matching a regular expression. - void addNameRegexp(const String & name_regexp); - void removeNameRegexp(const String & name_regexp); - const std::vector & getNameRegexps() const { return name_regexps; } - - /// Allows IP addresses or host names using LIKE pattern. - /// This pattern can contain % and _ wildcard characters. - /// For example, addLikePattern("%") will allow all addresses. - void addLikePattern(const String & pattern); - void removeLikePattern(const String & like_pattern); - const std::vector & getLikePatterns() const { return like_patterns; } - - /// Allows local host. - void addLocalHost(); - void removeLocalHost(); - bool containsLocalHost() const { return local_host;} - - /// Allows any host. - void addAnyHost(); - bool containsAnyHost() const { return any_host;} - - void add(const AllowedClientHosts & other); - void remove(const AllowedClientHosts & other); - - /// Checks if the provided address is in the list. Returns false if not. - bool contains(const IPAddress & address) const; - - friend bool operator ==(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs); - friend bool operator !=(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) { return !(lhs == rhs); } - -private: - std::vector addresses; - std::vector subnets; - Strings names; - Strings name_regexps; - Strings like_patterns; - bool any_host = false; - bool local_host = false; -}; - - -inline void AllowedClientHosts::IPSubnet::set(const IPAddress & prefix_, const IPAddress & mask_) -{ - prefix = prefix_; - mask = mask_; - - if (prefix.family() != mask.family()) - { - if (prefix.family() == IPAddress::IPv4) - prefix = IPAddress("::ffff:" + prefix.toString()); - - if (mask.family() == IPAddress::IPv4) - mask = IPAddress(96, IPAddress::IPv6) | IPAddress("::ffff:" + mask.toString()); - } - - prefix = prefix & mask; - - if (prefix.family() == IPAddress::IPv4) - { - if ((prefix & IPAddress{8, IPAddress::IPv4}) == IPAddress{"127.0.0.0"}) - { - // 127.XX.XX.XX -> 127.0.0.1 - prefix = IPAddress{"127.0.0.1"}; - mask = IPAddress{32, IPAddress::IPv4}; - } - } - else - { - if ((prefix & IPAddress{104, IPAddress::IPv6}) == IPAddress{"::ffff:127.0.0.0"}) - { - // ::ffff:127.XX.XX.XX -> ::1 - prefix = IPAddress{"::1"}; - mask = IPAddress{128, IPAddress::IPv6}; - } - } -} - -inline void AllowedClientHosts::IPSubnet::set(const IPAddress & prefix_, size_t num_prefix_bits) -{ - set(prefix_, IPAddress(num_prefix_bits, prefix_.family())); -} - -inline void AllowedClientHosts::IPSubnet::set(const IPAddress & address) -{ - set(address, address.length() * 8); -} - -inline AllowedClientHosts::IPSubnet::IPSubnet(const String & str) -{ - size_t slash = str.find('/'); - if (slash == String::npos) - { - set(IPAddress(str)); - return; - } - - IPAddress new_prefix{String{str, 0, slash}}; - String mask_str(str, slash + 1, str.length() - slash - 1); - bool only_digits = (mask_str.find_first_not_of("0123456789") == std::string::npos); - if (only_digits) - set(new_prefix, std::stoul(mask_str)); - else - set(new_prefix, IPAddress{mask_str}); -} - -inline String AllowedClientHosts::IPSubnet::toString() const -{ - unsigned int prefix_length = mask.prefixLength(); - if (isMaskAllBitsOne()) - return prefix.toString(); - else if (IPAddress{prefix_length, mask.family()} == mask) - return fs::path(prefix.toString()) / std::to_string(prefix_length); - else - return fs::path(prefix.toString()) / mask.toString(); -} - -inline bool AllowedClientHosts::IPSubnet::isMaskAllBitsOne() const -{ - return mask == IPAddress(mask.length() * 8, mask.family()); -} - - -inline void AllowedClientHosts::clear() -{ - addresses = {}; - subnets = {}; - names = {}; - name_regexps = {}; - like_patterns = {}; - any_host = false; - local_host = false; -} - -inline bool AllowedClientHosts::empty() const -{ - return !any_host && !local_host && addresses.empty() && subnets.empty() && names.empty() && name_regexps.empty() && like_patterns.empty(); -} - -inline void AllowedClientHosts::addAddress(const IPAddress & address) -{ - if (address.isLoopback()) - local_host = true; - else if (boost::range::find(addresses, address) == addresses.end()) - addresses.push_back(address); -} - -inline void AllowedClientHosts::removeAddress(const IPAddress & address) -{ - if (address.isLoopback()) - local_host = false; - else - boost::range::remove_erase(addresses, address); -} - -inline void AllowedClientHosts::addSubnet(const IPSubnet & subnet) -{ - if (subnet.getMask().isWildcard()) - any_host = true; - else if (subnet.isMaskAllBitsOne()) - addAddress(subnet.getPrefix()); - else if (boost::range::find(subnets, subnet) == subnets.end()) - subnets.push_back(subnet); -} - -inline void AllowedClientHosts::removeSubnet(const IPSubnet & subnet) -{ - if (subnet.getMask().isWildcard()) - any_host = false; - else if (subnet.isMaskAllBitsOne()) - removeAddress(subnet.getPrefix()); - else - boost::range::remove_erase(subnets, subnet); -} - -inline void AllowedClientHosts::addName(const String & name) -{ - if (boost::iequals(name, "localhost")) - local_host = true; - else if (boost::range::find(names, name) == names.end()) - names.push_back(name); -} - -inline void AllowedClientHosts::removeName(const String & name) -{ - if (boost::iequals(name, "localhost")) - local_host = false; - else - boost::range::remove_erase(names, name); -} - -inline void AllowedClientHosts::addNameRegexp(const String & name_regexp) -{ - if (boost::iequals(name_regexp, "localhost")) - local_host = true; - else if (name_regexp == ".*") - any_host = true; - else if (boost::range::find(name_regexps, name_regexp) == name_regexps.end()) - name_regexps.push_back(name_regexp); -} - -inline void AllowedClientHosts::removeNameRegexp(const String & name_regexp) -{ - if (boost::iequals(name_regexp, "localhost")) - local_host = false; - else if (name_regexp == ".*") - any_host = false; - else - boost::range::remove_erase(name_regexps, name_regexp); -} - -inline void AllowedClientHosts::addLikePattern(const String & pattern) -{ - if (boost::iequals(pattern, "localhost") || (pattern == "127.0.0.1") || (pattern == "::1")) - local_host = true; - else if ((pattern == "%") || (pattern == "0.0.0.0/0") || (pattern == "::/0")) - any_host = true; - else if (boost::range::find(like_patterns, pattern) == name_regexps.end()) - like_patterns.push_back(pattern); -} - -inline void AllowedClientHosts::removeLikePattern(const String & pattern) -{ - if (boost::iequals(pattern, "localhost") || (pattern == "127.0.0.1") || (pattern == "::1")) - local_host = false; - else if ((pattern == "%") || (pattern == "0.0.0.0/0") || (pattern == "::/0")) - any_host = false; - else - boost::range::remove_erase(like_patterns, pattern); -} - -inline void AllowedClientHosts::addLocalHost() -{ - local_host = true; -} - -inline void AllowedClientHosts::removeLocalHost() -{ - local_host = false; -} - -inline void AllowedClientHosts::addAnyHost() -{ - clear(); - any_host = true; -} - -inline void AllowedClientHosts::add(const AllowedClientHosts & other) -{ - if (other.containsAnyHost()) - { - addAnyHost(); - return; - } - if (other.containsLocalHost()) - addLocalHost(); - for (const IPAddress & address : other.getAddresses()) - addAddress(address); - for (const IPSubnet & subnet : other.getSubnets()) - addSubnet(subnet); - for (const String & name : other.getNames()) - addName(name); - for (const String & name_regexp : other.getNameRegexps()) - addNameRegexp(name_regexp); - for (const String & like_pattern : other.getLikePatterns()) - addLikePattern(like_pattern); -} - -inline void AllowedClientHosts::remove(const AllowedClientHosts & other) -{ - if (other.containsAnyHost()) - { - clear(); - return; - } - if (other.containsLocalHost()) - removeLocalHost(); - for (const IPAddress & address : other.getAddresses()) - removeAddress(address); - for (const IPSubnet & subnet : other.getSubnets()) - removeSubnet(subnet); - for (const String & name : other.getNames()) - removeName(name); - for (const String & name_regexp : other.getNameRegexps()) - removeNameRegexp(name_regexp); - for (const String & like_pattern : other.getLikePatterns()) - removeLikePattern(like_pattern); -} - - -inline bool operator ==(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) -{ - return (lhs.any_host == rhs.any_host) && (lhs.local_host == rhs.local_host) && (lhs.addresses == rhs.addresses) - && (lhs.subnets == rhs.subnets) && (lhs.names == rhs.names) && (lhs.name_regexps == rhs.name_regexps) - && (lhs.like_patterns == rhs.like_patterns); -} - -} diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index f07bdb10578..794c0a0d5d5 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -17,12 +18,12 @@ namespace ErrorCodes namespace { - using Digest = Authentication::Digest; - using Util = Authentication::Util; + using Digest = AuthenticationData::Digest; + using Util = AuthenticationData::Util; bool checkPasswordPlainText(const String & password, const Digest & password_plaintext) { - return (Util::encodePlainText(password) == password_plaintext); + return (Util::stringToDigest(password) == password_plaintext); } bool checkPasswordDoubleSHA1(const std::string_view & password, const Digest & password_double_sha1) @@ -67,76 +68,76 @@ namespace } -bool Authentication::areCredentialsValid(const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const +bool Authentication::areCredentialsValid(const Credentials & credentials, const AuthenticationData & auth_data, const ExternalAuthenticators & external_authenticators) { if (!credentials.isReady()) return false; if (const auto * gss_acceptor_context = typeid_cast(&credentials)) { - switch (type) + switch (auth_data.getType()) { - case NO_PASSWORD: - case PLAINTEXT_PASSWORD: - case SHA256_PASSWORD: - case DOUBLE_SHA1_PASSWORD: - case LDAP: - throw Require("ClickHouse Basic Authentication"); + case AuthenticationType::NO_PASSWORD: + case AuthenticationType::PLAINTEXT_PASSWORD: + case AuthenticationType::SHA256_PASSWORD: + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + case AuthenticationType::LDAP: + throw Authentication::Require("ClickHouse Basic Authentication"); - case KERBEROS: - return external_authenticators.checkKerberosCredentials(kerberos_realm, *gss_acceptor_context); + case AuthenticationType::KERBEROS: + return external_authenticators.checkKerberosCredentials(auth_data.getKerberosRealm(), *gss_acceptor_context); - case MAX_TYPE: + case AuthenticationType::MAX: break; } } if (const auto * mysql_credentials = typeid_cast(&credentials)) { - switch (type) + switch (auth_data.getType()) { - case NO_PASSWORD: + case AuthenticationType::NO_PASSWORD: return true; // N.B. even if the password is not empty! - case PLAINTEXT_PASSWORD: - return checkPasswordPlainTextMySQL(mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), password_hash); + case AuthenticationType::PLAINTEXT_PASSWORD: + return checkPasswordPlainTextMySQL(mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), auth_data.getPasswordHashBinary()); - case DOUBLE_SHA1_PASSWORD: - return checkPasswordDoubleSHA1MySQL(mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), password_hash); + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + return checkPasswordDoubleSHA1MySQL(mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), auth_data.getPasswordHashBinary()); - case SHA256_PASSWORD: - case LDAP: - case KERBEROS: - throw Require("ClickHouse Basic Authentication"); + case AuthenticationType::SHA256_PASSWORD: + case AuthenticationType::LDAP: + case AuthenticationType::KERBEROS: + throw Authentication::Require("ClickHouse Basic Authentication"); - case MAX_TYPE: + case AuthenticationType::MAX: break; } } if (const auto * basic_credentials = typeid_cast(&credentials)) { - switch (type) + switch (auth_data.getType()) { - case NO_PASSWORD: + case AuthenticationType::NO_PASSWORD: return true; // N.B. even if the password is not empty! - case PLAINTEXT_PASSWORD: - return checkPasswordPlainText(basic_credentials->getPassword(), password_hash); + case AuthenticationType::PLAINTEXT_PASSWORD: + return checkPasswordPlainText(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); - case SHA256_PASSWORD: - return checkPasswordSHA256(basic_credentials->getPassword(), password_hash); + case AuthenticationType::SHA256_PASSWORD: + return checkPasswordSHA256(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); - case DOUBLE_SHA1_PASSWORD: - return checkPasswordDoubleSHA1(basic_credentials->getPassword(), password_hash); + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + return checkPasswordDoubleSHA1(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); - case LDAP: - return external_authenticators.checkLDAPCredentials(ldap_server_name, *basic_credentials); + case AuthenticationType::LDAP: + return external_authenticators.checkLDAPCredentials(auth_data.getLDAPServerName(), *basic_credentials); - case KERBEROS: - throw Require(kerberos_realm); + case AuthenticationType::KERBEROS: + throw Authentication::Require(auth_data.getKerberosRealm()); - case MAX_TYPE: + case AuthenticationType::MAX: break; } } @@ -144,7 +145,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) return true; - throw Exception("areCredentialsValid(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("areCredentialsValid(): authentication type " + toString(auth_data.getType()) + " not supported", ErrorCodes::NOT_IMPLEMENTED); } } diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h index e43d3793ee0..000ba8ca324 100644 --- a/src/Access/Authentication.h +++ b/src/Access/Authentication.h @@ -1,60 +1,26 @@ #pragma once -#include +#include #include -#include -#include -#include -#include +#include namespace DB { namespace ErrorCodes { - extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; } class Credentials; class ExternalAuthenticators; -/// Authentication type and encrypted password for checking when a user logins. -class Authentication + +/// TODO: Try to move this checking to Credentials. +struct Authentication { -public: - enum Type - { - /// User doesn't have to enter password. - NO_PASSWORD, - - /// Password is stored as is. - PLAINTEXT_PASSWORD, - - /// Password is encrypted in SHA256 hash. - SHA256_PASSWORD, - - /// SHA1(SHA1(password)). - /// This kind of hash is used by the `mysql_native_password` authentication plugin. - DOUBLE_SHA1_PASSWORD, - - /// Password is checked by a [remote] LDAP server. Connection will be made at each authentication attempt. - LDAP, - - /// Kerberos authentication performed through GSS-API negotiation loop. - KERBEROS, - - MAX_TYPE, - }; - - struct TypeInfo - { - const char * const raw_name; - const String name; /// Lowercased with underscores, e.g. "sha256_password". - static const TypeInfo & get(Type type_); - }; + /// Checks the credentials (passwords, readiness, etc.) + static bool areCredentialsValid(const Credentials & credentials, const AuthenticationData & auth_data, const ExternalAuthenticators & external_authenticators); // A signaling class used to communicate requirements for credentials. template @@ -67,110 +33,9 @@ public: private: const String realm; }; - - using Digest = std::vector; - - Authentication(Authentication::Type type_ = NO_PASSWORD) : type(type_) {} - Authentication(const Authentication & src) = default; - Authentication & operator =(const Authentication & src) = default; - Authentication(Authentication && src) = default; - Authentication & operator =(Authentication && src) = default; - - Type getType() const { return type; } - - /// Sets the password and encrypt it using the authentication type set in the constructor. - void setPassword(const String & password_); - - /// Returns the password. Allowed to use only for Type::PLAINTEXT_PASSWORD. - String getPassword() const; - - /// Sets the password as a string of hexadecimal digits. - void setPasswordHashHex(const String & hash); - String getPasswordHashHex() const; - - /// Sets the password in binary form. - void setPasswordHashBinary(const Digest & hash); - const Digest & getPasswordHashBinary() const { return password_hash; } - - /// Sets the server name for authentication type LDAP. - const String & getLDAPServerName() const; - void setLDAPServerName(const String & name); - - /// Sets the realm name for authentication type KERBEROS. - const String & getKerberosRealm() const; - void setKerberosRealm(const String & realm); - - /// Checks the credentials (passwords, readiness, etc.) - bool areCredentialsValid(const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const; - - friend bool operator ==(const Authentication & lhs, const Authentication & rhs) { return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash); } - friend bool operator !=(const Authentication & lhs, const Authentication & rhs) { return !(lhs == rhs); } - - struct Util - { - static Digest encodePlainText(const std::string_view & text) { return Digest(text.data(), text.data() + text.size()); } - static Digest encodeSHA256(const std::string_view & text); - static Digest encodeSHA1(const std::string_view & text); - static Digest encodeSHA1(const Digest & text) { return encodeSHA1(std::string_view{reinterpret_cast(text.data()), text.size()}); } - static Digest encodeDoubleSHA1(const std::string_view & text) { return encodeSHA1(encodeSHA1(text)); } - static Digest encodeDoubleSHA1(const Digest & text) { return encodeSHA1(encodeSHA1(text)); } - }; - -private: - Type type = Type::NO_PASSWORD; - Digest password_hash; - String ldap_server_name; - String kerberos_realm; }; -inline const Authentication::TypeInfo & Authentication::TypeInfo::get(Type type_) -{ - static constexpr auto make_info = [](const char * raw_name_) - { - String init_name = raw_name_; - boost::to_lower(init_name); - return TypeInfo{raw_name_, std::move(init_name)}; - }; - - switch (type_) - { - case NO_PASSWORD: - { - static const auto info = make_info("NO_PASSWORD"); - return info; - } - case PLAINTEXT_PASSWORD: - { - static const auto info = make_info("PLAINTEXT_PASSWORD"); - return info; - } - case SHA256_PASSWORD: - { - static const auto info = make_info("SHA256_PASSWORD"); - return info; - } - case DOUBLE_SHA1_PASSWORD: - { - static const auto info = make_info("DOUBLE_SHA1_PASSWORD"); - return info; - } - case LDAP: - { - static const auto info = make_info("LDAP"); - return info; - } - case KERBEROS: - { - static const auto info = make_info("KERBEROS"); - return info; - } - case MAX_TYPE: - break; - } - throw Exception("Unknown authentication type: " + std::to_string(static_cast(type_)), ErrorCodes::LOGICAL_ERROR); -} - template Authentication::Require::Require(const String & realm_) : Exception("Credentials required", ErrorCodes::BAD_ARGUMENTS) @@ -184,148 +49,4 @@ const String & Authentication::Require::getRealm() const return realm; } -inline String toString(Authentication::Type type_) -{ - return Authentication::TypeInfo::get(type_).raw_name; -} - - -inline Authentication::Digest Authentication::Util::encodeSHA256(const std::string_view & text [[maybe_unused]]) -{ -#if USE_SSL - Digest hash; - hash.resize(32); - ::DB::encodeSHA256(text, hash.data()); - return hash; -#else - throw DB::Exception( - "SHA256 passwords support is disabled, because ClickHouse was built without SSL library", - DB::ErrorCodes::SUPPORT_IS_DISABLED); -#endif -} - -inline Authentication::Digest Authentication::Util::encodeSHA1(const std::string_view & text) -{ - Poco::SHA1Engine engine; - engine.update(text.data(), text.size()); - return engine.digest(); -} - - -inline void Authentication::setPassword(const String & password_) -{ - switch (type) - { - case PLAINTEXT_PASSWORD: - return setPasswordHashBinary(Util::encodePlainText(password_)); - - case SHA256_PASSWORD: - return setPasswordHashBinary(Util::encodeSHA256(password_)); - - case DOUBLE_SHA1_PASSWORD: - return setPasswordHashBinary(Util::encodeDoubleSHA1(password_)); - - case NO_PASSWORD: - case LDAP: - case KERBEROS: - throw Exception("Cannot specify password for authentication type " + toString(type), ErrorCodes::LOGICAL_ERROR); - - case MAX_TYPE: - break; - } - throw Exception("setPassword(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); -} - - -inline String Authentication::getPassword() const -{ - if (type != PLAINTEXT_PASSWORD) - throw Exception("Cannot decode the password", ErrorCodes::LOGICAL_ERROR); - return String(password_hash.data(), password_hash.data() + password_hash.size()); -} - - -inline void Authentication::setPasswordHashHex(const String & hash) -{ - Digest digest; - digest.resize(hash.size() / 2); - boost::algorithm::unhex(hash.begin(), hash.end(), digest.data()); - setPasswordHashBinary(digest); -} - -inline String Authentication::getPasswordHashHex() const -{ - if (type == LDAP || type == KERBEROS) - throw Exception("Cannot get password hex hash for authentication type " + toString(type), ErrorCodes::LOGICAL_ERROR); - - String hex; - hex.resize(password_hash.size() * 2); - boost::algorithm::hex(password_hash.begin(), password_hash.end(), hex.data()); - return hex; -} - - -inline void Authentication::setPasswordHashBinary(const Digest & hash) -{ - switch (type) - { - case PLAINTEXT_PASSWORD: - { - password_hash = hash; - return; - } - - case SHA256_PASSWORD: - { - if (hash.size() != 32) - throw Exception( - "Password hash for the 'SHA256_PASSWORD' authentication type has length " + std::to_string(hash.size()) - + " but must be exactly 32 bytes.", - ErrorCodes::BAD_ARGUMENTS); - password_hash = hash; - return; - } - - case DOUBLE_SHA1_PASSWORD: - { - if (hash.size() != 20) - throw Exception( - "Password hash for the 'DOUBLE_SHA1_PASSWORD' authentication type has length " + std::to_string(hash.size()) - + " but must be exactly 20 bytes.", - ErrorCodes::BAD_ARGUMENTS); - password_hash = hash; - return; - } - - case NO_PASSWORD: - case LDAP: - case KERBEROS: - throw Exception("Cannot specify password binary hash for authentication type " + toString(type), ErrorCodes::LOGICAL_ERROR); - - case MAX_TYPE: - break; - } - throw Exception("setPasswordHashBinary(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); -} - -inline const String & Authentication::getLDAPServerName() const -{ - return ldap_server_name; -} - -inline void Authentication::setLDAPServerName(const String & name) -{ - ldap_server_name = name; -} - -inline const String & Authentication::getKerberosRealm() const -{ - return kerberos_realm; -} - -inline void Authentication::setKerberosRealm(const String & realm) -{ - kerberos_realm = realm; -} - } diff --git a/src/Access/Common/AccessFlags.cpp b/src/Access/Common/AccessFlags.cpp new file mode 100644 index 00000000000..e7dddbdcba2 --- /dev/null +++ b/src/Access/Common/AccessFlags.cpp @@ -0,0 +1,378 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNKNOWN_ACCESS_TYPE; + extern const int LOGICAL_ERROR; +} + +namespace +{ + using Flags = std::bitset; + + class Helper + { + public: + static const Helper & instance() + { + static const Helper res; + return res; + } + + Flags accessTypeToFlags(AccessType type) const + { + return access_type_to_flags_mapping[static_cast(type)]; + } + + Flags keywordToFlags(const std::string_view & keyword) const + { + auto it = keyword_to_flags_map.find(keyword); + if (it == keyword_to_flags_map.end()) + { + String uppercased_keyword{keyword}; + boost::to_upper(uppercased_keyword); + it = keyword_to_flags_map.find(uppercased_keyword); + if (it == keyword_to_flags_map.end()) + throw Exception("Unknown access type: " + String(keyword), ErrorCodes::UNKNOWN_ACCESS_TYPE); + } + return it->second; + } + + Flags keywordsToFlags(const std::vector & keywords) const + { + Flags res; + for (const auto & keyword : keywords) + res |= keywordToFlags(keyword); + return res; + } + + Flags keywordsToFlags(const Strings & keywords) const + { + Flags res; + for (const auto & keyword : keywords) + res |= keywordToFlags(keyword); + return res; + } + + std::vector flagsToAccessTypes(const Flags & flags_) const + { + std::vector access_types; + flagsToAccessTypesRec(flags_, access_types, *all_node); + return access_types; + } + + std::vector flagsToKeywords(const Flags & flags_) const + { + std::vector keywords; + flagsToKeywordsRec(flags_, keywords, *all_node); + return keywords; + } + + String flagsToString(const Flags & flags_) const + { + auto keywords = flagsToKeywords(flags_); + if (keywords.empty()) + return "USAGE"; + String str; + for (const auto & keyword : keywords) + { + if (!str.empty()) + str += ", "; + str += keyword; + } + return str; + } + + const Flags & getAllFlags() const { return all_flags; } + const Flags & getGlobalFlags() const { return all_flags_for_target[GLOBAL]; } + const Flags & getDatabaseFlags() const { return all_flags_for_target[DATABASE]; } + const Flags & getTableFlags() const { return all_flags_for_target[TABLE]; } + const Flags & getColumnFlags() const { return all_flags_for_target[COLUMN]; } + const Flags & getDictionaryFlags() const { return all_flags_for_target[DICTIONARY]; } + const Flags & getAllFlagsGrantableOnGlobalLevel() const { return getAllFlags(); } + const Flags & getAllFlagsGrantableOnDatabaseLevel() const { return all_flags_grantable_on_database_level; } + const Flags & getAllFlagsGrantableOnTableLevel() const { return all_flags_grantable_on_table_level; } + const Flags & getAllFlagsGrantableOnColumnLevel() const { return getColumnFlags(); } + + private: + enum NodeType + { + UNKNOWN = -2, + GROUP = -1, + GLOBAL, + DATABASE, + TABLE, + VIEW = TABLE, + COLUMN, + DICTIONARY, + }; + + struct Node; + using NodePtr = std::unique_ptr; + + struct Node + { + const String keyword; + NodeType node_type = UNKNOWN; + AccessType access_type = AccessType::NONE; + Strings aliases; + Flags flags; + std::vector children; + + explicit Node(String keyword_) : keyword(std::move(keyword_)) {} + Node(String keyword_, NodeType node_type_) : keyword(std::move(keyword_)), node_type(node_type_) {} + + void setFlag(size_t flag) { flags.set(flag); } + + void addChild(NodePtr child) + { + flags |= child->flags; + children.push_back(std::move(child)); + } + }; + + static String replaceUnderscoreWithSpace(const std::string_view & str) + { + String res{str}; + boost::replace_all(res, "_", " "); + return res; + } + + static Strings splitAliases(const std::string_view & str) + { + Strings aliases; + boost::split(aliases, str, boost::is_any_of(",")); + for (auto & alias : aliases) + boost::trim(alias); + return aliases; + } + + static void makeNode( + AccessType access_type, + const std::string_view & name, + const std::string_view & aliases, + NodeType node_type, + const std::string_view & parent_group_name, + std::unordered_map & nodes, + std::unordered_map & owned_nodes, + size_t & next_flag) + { + NodePtr node; + auto keyword = replaceUnderscoreWithSpace(name); + auto it = owned_nodes.find(keyword); + if (it != owned_nodes.end()) + { + node = std::move(it->second); + owned_nodes.erase(it); + } + else + { + if (nodes.count(keyword)) + throw Exception(keyword + " declared twice", ErrorCodes::LOGICAL_ERROR); + node = std::make_unique(keyword, node_type); + nodes[node->keyword] = node.get(); + } + + node->access_type = access_type; + node->node_type = node_type; + node->aliases = splitAliases(aliases); + if (node_type != GROUP) + node->setFlag(next_flag++); + + bool has_parent_group = (parent_group_name != std::string_view{"NONE"}); + if (!has_parent_group) + { + std::string_view keyword_as_string_view = node->keyword; + owned_nodes[keyword_as_string_view] = std::move(node); + return; + } + + auto parent_keyword = replaceUnderscoreWithSpace(parent_group_name); + auto it_parent = nodes.find(parent_keyword); + if (it_parent == nodes.end()) + { + auto parent_node = std::make_unique(parent_keyword); + it_parent = nodes.emplace(parent_node->keyword, parent_node.get()).first; + assert(!owned_nodes.count(parent_node->keyword)); + std::string_view parent_keyword_as_string_view = parent_node->keyword; + owned_nodes[parent_keyword_as_string_view] = std::move(parent_node); + } + it_parent->second->addChild(std::move(node)); + } + + void makeNodes() + { + std::unordered_map owned_nodes; + std::unordered_map nodes; + size_t next_flag = 0; + +# define MAKE_ACCESS_FLAGS_NODE(name, aliases, node_type, parent_group_name) \ + makeNode(AccessType::name, #name, aliases, node_type, #parent_group_name, nodes, owned_nodes, next_flag); + + APPLY_FOR_ACCESS_TYPES(MAKE_ACCESS_FLAGS_NODE) + +# undef MAKE_ACCESS_FLAGS_NODE + + if (!owned_nodes.count("NONE")) + throw Exception("'NONE' not declared", ErrorCodes::LOGICAL_ERROR); + if (!owned_nodes.count("ALL")) + throw Exception("'ALL' not declared", ErrorCodes::LOGICAL_ERROR); + + all_node = std::move(owned_nodes["ALL"]); + none_node = std::move(owned_nodes["NONE"]); + owned_nodes.erase("ALL"); + owned_nodes.erase("NONE"); + + if (!owned_nodes.empty()) + { + const auto & unused_node = *(owned_nodes.begin()->second); + if (unused_node.node_type == UNKNOWN) + throw Exception("Parent group '" + unused_node.keyword + "' not found", ErrorCodes::LOGICAL_ERROR); + else + throw Exception("Access type '" + unused_node.keyword + "' should have parent group", ErrorCodes::LOGICAL_ERROR); + } + } + + void makeKeywordToFlagsMap(Node * start_node = nullptr) + { + if (!start_node) + { + makeKeywordToFlagsMap(none_node.get()); + start_node = all_node.get(); + } + + start_node->aliases.emplace_back(start_node->keyword); + for (auto & alias : start_node->aliases) + { + boost::to_upper(alias); + keyword_to_flags_map[alias] = start_node->flags; + } + + for (auto & child : start_node->children) + makeKeywordToFlagsMap(child.get()); + } + + void makeAccessTypeToFlagsMapping(Node * start_node = nullptr) + { + if (!start_node) + { + makeAccessTypeToFlagsMapping(none_node.get()); + start_node = all_node.get(); + } + + size_t index = static_cast(start_node->access_type); + access_type_to_flags_mapping.resize(std::max(index + 1, access_type_to_flags_mapping.size())); + access_type_to_flags_mapping[index] = start_node->flags; + + for (auto & child : start_node->children) + makeAccessTypeToFlagsMapping(child.get()); + } + + void collectAllFlags(const Node * start_node = nullptr) + { + if (!start_node) + { + start_node = all_node.get(); + all_flags = start_node->flags; + } + if (start_node->node_type != GROUP) + { + assert(static_cast(start_node->node_type) < std::size(all_flags_for_target)); + all_flags_for_target[start_node->node_type] |= start_node->flags; + } + for (const auto & child : start_node->children) + collectAllFlags(child.get()); + + all_flags_grantable_on_table_level = all_flags_for_target[TABLE] | all_flags_for_target[DICTIONARY] | all_flags_for_target[COLUMN]; + all_flags_grantable_on_database_level = all_flags_for_target[DATABASE] | all_flags_grantable_on_table_level; + } + + Helper() + { + makeNodes(); + makeKeywordToFlagsMap(); + makeAccessTypeToFlagsMapping(); + collectAllFlags(); + } + + static void flagsToAccessTypesRec(const Flags & flags_, std::vector & access_types, const Node & start_node) + { + Flags matching_flags = (flags_ & start_node.flags); + if (matching_flags.any()) + { + if (matching_flags == start_node.flags) + { + access_types.push_back(start_node.access_type); + } + else + { + for (const auto & child : start_node.children) + flagsToAccessTypesRec(flags_, access_types, *child); + } + } + } + + static void flagsToKeywordsRec(const Flags & flags_, std::vector & keywords, const Node & start_node) + { + Flags matching_flags = (flags_ & start_node.flags); + if (matching_flags.any()) + { + if (matching_flags == start_node.flags) + { + keywords.push_back(start_node.keyword); + } + else + { + for (const auto & child : start_node.children) + flagsToKeywordsRec(flags_, keywords, *child); + } + } + } + + NodePtr all_node; + NodePtr none_node; + std::unordered_map keyword_to_flags_map; + std::vector access_type_to_flags_mapping; + Flags all_flags; + Flags all_flags_for_target[static_cast(DICTIONARY) + 1]; + Flags all_flags_grantable_on_database_level; + Flags all_flags_grantable_on_table_level; + }; +} + + +AccessFlags::AccessFlags(AccessType type) : flags(Helper::instance().accessTypeToFlags(type)) {} +AccessFlags::AccessFlags(const std::string_view & keyword) : flags(Helper::instance().keywordToFlags(keyword)) {} +AccessFlags::AccessFlags(const std::vector & keywords) : flags(Helper::instance().keywordsToFlags(keywords)) {} +AccessFlags::AccessFlags(const Strings & keywords) : flags(Helper::instance().keywordsToFlags(keywords)) {} +String AccessFlags::toString() const { return Helper::instance().flagsToString(flags); } +std::vector AccessFlags::toAccessTypes() const { return Helper::instance().flagsToAccessTypes(flags); } +std::vector AccessFlags::toKeywords() const { return Helper::instance().flagsToKeywords(flags); } +AccessFlags AccessFlags::allFlags() { return Helper::instance().getAllFlags(); } +AccessFlags AccessFlags::allGlobalFlags() { return Helper::instance().getGlobalFlags(); } +AccessFlags AccessFlags::allDatabaseFlags() { return Helper::instance().getDatabaseFlags(); } +AccessFlags AccessFlags::allTableFlags() { return Helper::instance().getTableFlags(); } +AccessFlags AccessFlags::allColumnFlags() { return Helper::instance().getColumnFlags(); } +AccessFlags AccessFlags::allDictionaryFlags() { return Helper::instance().getDictionaryFlags(); } +AccessFlags AccessFlags::allFlagsGrantableOnGlobalLevel() { return Helper::instance().getAllFlagsGrantableOnGlobalLevel(); } +AccessFlags AccessFlags::allFlagsGrantableOnDatabaseLevel() { return Helper::instance().getAllFlagsGrantableOnDatabaseLevel(); } +AccessFlags AccessFlags::allFlagsGrantableOnTableLevel() { return Helper::instance().getAllFlagsGrantableOnTableLevel(); } +AccessFlags AccessFlags::allFlagsGrantableOnColumnLevel() { return Helper::instance().getAllFlagsGrantableOnColumnLevel(); } + +AccessFlags operator |(AccessType left, AccessType right) { return AccessFlags(left) | right; } +AccessFlags operator &(AccessType left, AccessType right) { return AccessFlags(left) & right; } +AccessFlags operator -(AccessType left, AccessType right) { return AccessFlags(left) - right; } +AccessFlags operator ~(AccessType x) { return ~AccessFlags(x); } + +} diff --git a/src/Access/Common/AccessFlags.h b/src/Access/Common/AccessFlags.h new file mode 100644 index 00000000000..5a5452c50ea --- /dev/null +++ b/src/Access/Common/AccessFlags.h @@ -0,0 +1,120 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ +using Strings = std::vector; + +/// Represents a combination of access types which can be granted globally, on databases, tables, columns, etc. +/// For example "SELECT, CREATE USER" is an access type. +class AccessFlags +{ +public: + AccessFlags(AccessType type); + + /// The same as AccessFlags(AccessType::NONE). + AccessFlags() = default; + + /// Constructs from a string like "SELECT". + AccessFlags(const std::string_view & keyword); + + /// Constructs from a list of strings like "SELECT, UPDATE, INSERT". + AccessFlags(const std::vector & keywords); + AccessFlags(const Strings & keywords); + + AccessFlags(const AccessFlags & src) = default; + AccessFlags(AccessFlags && src) = default; + AccessFlags & operator =(const AccessFlags & src) = default; + AccessFlags & operator =(AccessFlags && src) = default; + + /// Returns the access type which contains two specified access types. + AccessFlags & operator |=(const AccessFlags & other) { flags |= other.flags; return *this; } + friend AccessFlags operator |(const AccessFlags & left, const AccessFlags & right) { return AccessFlags(left) |= right; } + + /// Returns the access type which contains the common part of two access types. + AccessFlags & operator &=(const AccessFlags & other) { flags &= other.flags; return *this; } + friend AccessFlags operator &(const AccessFlags & left, const AccessFlags & right) { return AccessFlags(left) &= right; } + + /// Returns the access type which contains only the part of the first access type which is not the part of the second access type. + /// (lhs - rhs) is the same as (lhs & ~rhs). + AccessFlags & operator -=(const AccessFlags & other) { flags &= ~other.flags; return *this; } + friend AccessFlags operator -(const AccessFlags & left, const AccessFlags & right) { return AccessFlags(left) -= right; } + + AccessFlags operator ~() const { AccessFlags res; res.flags = ~flags; return res; } + + bool isEmpty() const { return flags.none(); } + explicit operator bool() const { return !isEmpty(); } + bool contains(const AccessFlags & other) const { return (flags & other.flags) == other.flags; } + + friend bool operator ==(const AccessFlags & left, const AccessFlags & right) { return left.flags == right.flags; } + friend bool operator !=(const AccessFlags & left, const AccessFlags & right) { return !(left == right); } + friend bool operator <(const AccessFlags & left, const AccessFlags & right) { return memcmp(&left.flags, &right.flags, sizeof(Flags)) < 0; } + friend bool operator >(const AccessFlags & left, const AccessFlags & right) { return right < left; } + friend bool operator <=(const AccessFlags & left, const AccessFlags & right) { return !(right < left); } + friend bool operator >=(const AccessFlags & left, const AccessFlags & right) { return !(left < right); } + + void clear() { flags.reset(); } + + /// Returns a comma-separated list of keywords, like "SELECT, CREATE USER, UPDATE". + String toString() const; + + /// Returns a list of access types. + std::vector toAccessTypes() const; + + /// Returns a list of keywords. + std::vector toKeywords() const; + + /// Returns all the flags. + /// These are the same as (allGlobalFlags() | allDatabaseFlags() | allTableFlags() | allColumnsFlags() | allDictionaryFlags()). + static AccessFlags allFlags(); + + /// Returns all the global flags. + static AccessFlags allGlobalFlags(); + + /// Returns all the flags related to a database. + static AccessFlags allDatabaseFlags(); + + /// Returns all the flags related to a table. + static AccessFlags allTableFlags(); + + /// Returns all the flags related to a column. + static AccessFlags allColumnFlags(); + + /// Returns all the flags related to a dictionary. + static AccessFlags allDictionaryFlags(); + + /// Returns all the flags which could be granted on the global level. + /// The same as allFlags(). + static AccessFlags allFlagsGrantableOnGlobalLevel(); + + /// Returns all the flags which could be granted on the database level. + /// Returns allDatabaseFlags() | allTableFlags() | allDictionaryFlags() | allColumnFlags(). + static AccessFlags allFlagsGrantableOnDatabaseLevel(); + + /// Returns all the flags which could be granted on the table level. + /// Returns allTableFlags() | allDictionaryFlags() | allColumnFlags(). + static AccessFlags allFlagsGrantableOnTableLevel(); + + /// Returns all the flags which could be granted on the global level. + /// The same as allColumnFlags(). + static AccessFlags allFlagsGrantableOnColumnLevel(); + + static constexpr size_t SIZE = 128; +private: + using Flags = std::bitset; + Flags flags; + + AccessFlags(const Flags & flags_) : flags(flags_) {} +}; + +AccessFlags operator |(AccessType left, AccessType right); +AccessFlags operator &(AccessType left, AccessType right); +AccessFlags operator -(AccessType left, AccessType right); +AccessFlags operator ~(AccessType x); + +} diff --git a/src/Access/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp similarity index 62% rename from src/Access/AccessRightsElement.cpp rename to src/Access/Common/AccessRightsElement.cpp index 823019ffebd..9913fc02f4a 100644 --- a/src/Access/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -145,10 +145,85 @@ namespace } +AccessRightsElement::AccessRightsElement(AccessFlags access_flags_, const std::string_view & database_) + : access_flags(access_flags_), database(database_), any_database(false) +{ +} + +AccessRightsElement::AccessRightsElement(AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_) + : access_flags(access_flags_), database(database_), table(table_), any_database(false), any_table(false) +{ +} + +AccessRightsElement::AccessRightsElement( + AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_, const std::string_view & column_) + : access_flags(access_flags_) + , database(database_) + , table(table_) + , columns({String{column_}}) + , any_database(false) + , any_table(false) + , any_column(false) +{ +} + +AccessRightsElement::AccessRightsElement( + AccessFlags access_flags_, + const std::string_view & database_, + const std::string_view & table_, + const std::vector & columns_) + : access_flags(access_flags_), database(database_), table(table_), any_database(false), any_table(false), any_column(false) +{ + columns.resize(columns_.size()); + for (size_t i = 0; i != columns_.size(); ++i) + columns[i] = String{columns_[i]}; +} + +AccessRightsElement::AccessRightsElement( + AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_, const Strings & columns_) + : access_flags(access_flags_) + , database(database_) + , table(table_) + , columns(columns_) + , any_database(false) + , any_table(false) + , any_column(false) +{ +} + +void AccessRightsElement::eraseNonGrantable() +{ + if (!any_column) + access_flags &= AccessFlags::allFlagsGrantableOnColumnLevel(); + else if (!any_table) + access_flags &= AccessFlags::allFlagsGrantableOnTableLevel(); + else if (!any_database) + access_flags &= AccessFlags::allFlagsGrantableOnDatabaseLevel(); + else + access_flags &= AccessFlags::allFlagsGrantableOnGlobalLevel(); +} + +void AccessRightsElement::replaceEmptyDatabase(const String & current_database) +{ + if (isEmptyDatabase()) + database = current_database; +} + String AccessRightsElement::toString() const { return toStringImpl(*this, true); } String AccessRightsElement::toStringWithoutOptions() const { return toStringImpl(*this, false); } -String AccessRightsElements::toString() const { return toStringImpl(*this, true); } -String AccessRightsElements::toStringWithoutOptions() const { return toStringImpl(*this, false); } + + +bool AccessRightsElements::empty() const { return std::all_of(begin(), end(), [](const AccessRightsElement & e) { return e.empty(); }); } + +bool AccessRightsElements::sameDatabaseAndTable() const +{ + return (size() < 2) || std::all_of(std::next(begin()), end(), [this](const AccessRightsElement & e) { return e.sameDatabaseAndTable(front()); }); +} + +bool AccessRightsElements::sameOptions() const +{ + return (size() < 2) || std::all_of(std::next(begin()), end(), [this](const AccessRightsElement & e) { return e.sameOptions(front()); }); +} void AccessRightsElements::eraseNonGrantable() { @@ -159,4 +234,13 @@ void AccessRightsElements::eraseNonGrantable() }); } +void AccessRightsElements::replaceEmptyDatabase(const String & current_database) +{ + for (auto & element : *this) + element.replaceEmptyDatabase(current_database); +} + +String AccessRightsElements::toString() const { return toStringImpl(*this, true); } +String AccessRightsElements::toStringWithoutOptions() const { return toStringImpl(*this, false); } + } diff --git a/src/Access/AccessRightsElement.h b/src/Access/Common/AccessRightsElement.h similarity index 57% rename from src/Access/AccessRightsElement.h rename to src/Access/Common/AccessRightsElement.h index c46a4b54e6e..4fb58b39185 100644 --- a/src/Access/AccessRightsElement.h +++ b/src/Access/Common/AccessRightsElement.h @@ -1,6 +1,7 @@ #pragma once -#include +#include +#include namespace DB @@ -27,51 +28,19 @@ struct AccessRightsElement AccessRightsElement(AccessFlags access_flags_) : access_flags(access_flags_) {} - AccessRightsElement(AccessFlags access_flags_, const std::string_view & database_) - : access_flags(access_flags_), database(database_), any_database(false) - { - } - - AccessRightsElement(AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_) - : access_flags(access_flags_), database(database_), table(table_), any_database(false), any_table(false) - { - } - + AccessRightsElement(AccessFlags access_flags_, const std::string_view & database_); + AccessRightsElement(AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_); AccessRightsElement( - AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_, const std::string_view & column_) - : access_flags(access_flags_) - , database(database_) - , table(table_) - , columns({String{column_}}) - , any_database(false) - , any_table(false) - , any_column(false) - { - } + AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_, const std::string_view & column_); AccessRightsElement( AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_, - const std::vector & columns_) - : access_flags(access_flags_), database(database_), table(table_), any_database(false), any_table(false), any_column(false) - { - columns.resize(columns_.size()); - for (size_t i = 0; i != columns_.size(); ++i) - columns[i] = String{columns_[i]}; - } + const std::vector & columns_); AccessRightsElement( - AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_, const Strings & columns_) - : access_flags(access_flags_) - , database(database_) - , table(table_) - , columns(columns_) - , any_database(false) - , any_table(false) - , any_column(false) - { - } + AccessFlags access_flags_, const std::string_view & database_, const std::string_view & table_, const Strings & columns_); bool empty() const { return !access_flags || (!any_column && columns.empty()); } @@ -91,26 +60,12 @@ struct AccessRightsElement } /// Resets flags which cannot be granted. - void eraseNonGrantable() - { - if (!any_column) - access_flags &= AccessFlags::allFlagsGrantableOnColumnLevel(); - else if (!any_table) - access_flags &= AccessFlags::allFlagsGrantableOnTableLevel(); - else if (!any_database) - access_flags &= AccessFlags::allFlagsGrantableOnDatabaseLevel(); - else - access_flags &= AccessFlags::allFlagsGrantableOnGlobalLevel(); - } + void eraseNonGrantable(); bool isEmptyDatabase() const { return !any_database && database.empty(); } /// If the database is empty, replaces it with `current_database`. Otherwise does nothing. - void replaceEmptyDatabase(const String & current_database) - { - if (isEmptyDatabase()) - database = current_database; - } + void replaceEmptyDatabase(const String & current_database); /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". String toString() const; @@ -125,27 +80,15 @@ public: using Base = std::vector; using Base::Base; - bool empty() const { return std::all_of(begin(), end(), [](const AccessRightsElement & e) { return e.empty(); }); } - - bool sameDatabaseAndTable() const - { - return (size() < 2) || std::all_of(std::next(begin()), end(), [this](const AccessRightsElement & e) { return e.sameDatabaseAndTable(front()); }); - } - - bool sameOptions() const - { - return (size() < 2) || std::all_of(std::next(begin()), end(), [this](const AccessRightsElement & e) { return e.sameOptions(front()); }); - } + bool empty() const; + bool sameDatabaseAndTable() const; + bool sameOptions() const; /// Resets flags which cannot be granted. void eraseNonGrantable(); /// If the database is empty, replaces it with `current_database`. Otherwise does nothing. - void replaceEmptyDatabase(const String & current_database) - { - for (auto & element : *this) - element.replaceEmptyDatabase(current_database); - } + void replaceEmptyDatabase(const String & current_database); /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". String toString() const; diff --git a/src/Access/Common/AccessType.cpp b/src/Access/Common/AccessType.cpp new file mode 100644 index 00000000000..d44d70d78b2 --- /dev/null +++ b/src/Access/Common/AccessType.cpp @@ -0,0 +1,56 @@ +#include +#include +#include + + +namespace DB +{ + +namespace +{ + using Strings = std::vector; + + class AccessTypeToStringConverter + { + public: + static const AccessTypeToStringConverter & instance() + { + static const AccessTypeToStringConverter res; + return res; + } + + std::string_view convert(AccessType type) const + { + return access_type_to_string_mapping[static_cast(type)]; + } + + private: + AccessTypeToStringConverter() + { +#define ACCESS_TYPE_TO_STRING_CONVERTER_ADD_TO_MAPPING(name, aliases, node_type, parent_group_name) \ + addToMapping(AccessType::name, #name); + + APPLY_FOR_ACCESS_TYPES(ACCESS_TYPE_TO_STRING_CONVERTER_ADD_TO_MAPPING) + +#undef ACCESS_TYPE_TO_STRING_CONVERTER_ADD_TO_MAPPING + } + + void addToMapping(AccessType type, const std::string_view & str) + { + String str2{str}; + boost::replace_all(str2, "_", " "); + size_t index = static_cast(type); + access_type_to_string_mapping.resize(std::max(index + 1, access_type_to_string_mapping.size())); + access_type_to_string_mapping[index] = str2; + } + + Strings access_type_to_string_mapping; + }; +} + +std::string_view toString(AccessType type) +{ + return AccessTypeToStringConverter::instance().convert(type); +} + +} diff --git a/src/Access/AccessType.h b/src/Access/Common/AccessType.h similarity index 89% rename from src/Access/AccessType.h rename to src/Access/Common/AccessType.h index aa7dcbb006b..cb6c326cb84 100644 --- a/src/Access/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -1,17 +1,11 @@ #pragma once #include -#include -#include -#include -#include namespace DB { -using Strings = std::vector; - /// Represents an access type which can be granted on databases, tables, columns, etc. enum class AccessType { @@ -198,48 +192,6 @@ enum class AccessType #undef DECLARE_ACCESS_TYPE_ENUM_CONST }; - -namespace impl -{ - template - class AccessTypeToStringConverter - { - public: - static const AccessTypeToStringConverter & instance() - { - static const AccessTypeToStringConverter res; - return res; - } - - std::string_view convert(AccessType type) const - { - return access_type_to_string_mapping[static_cast(type)]; - } - - private: - AccessTypeToStringConverter() - { -#define ACCESS_TYPE_TO_STRING_CONVERTER_ADD_TO_MAPPING(name, aliases, node_type, parent_group_name) \ - addToMapping(AccessType::name, #name); - - APPLY_FOR_ACCESS_TYPES(ACCESS_TYPE_TO_STRING_CONVERTER_ADD_TO_MAPPING) - -#undef ACCESS_TYPE_TO_STRING_CONVERTER_ADD_TO_MAPPING - } - - void addToMapping(AccessType type, const std::string_view & str) - { - String str2{str}; - boost::replace_all(str2, "_", " "); - size_t index = static_cast(type); - access_type_to_string_mapping.resize(std::max(index + 1, access_type_to_string_mapping.size())); - access_type_to_string_mapping[index] = str2; - } - - Strings access_type_to_string_mapping; - }; -} - -inline std::string_view toString(AccessType type) { return impl::AccessTypeToStringConverter<>::instance().convert(type); } +std::string_view toString(AccessType type); } diff --git a/src/Access/AllowedClientHosts.cpp b/src/Access/Common/AllowedClientHosts.cpp similarity index 58% rename from src/Access/AllowedClientHosts.cpp rename to src/Access/Common/AllowedClientHosts.cpp index f306b1386dd..62cef97e90e 100644 --- a/src/Access/AllowedClientHosts.cpp +++ b/src/Access/Common/AllowedClientHosts.cpp @@ -1,13 +1,19 @@ -#include +#include #include #include #include #include #include #include +#include #include -#include +#include +#include #include +#include +#include + +namespace fs = std::filesystem; namespace DB @@ -186,6 +192,262 @@ namespace } +void AllowedClientHosts::IPSubnet::set(const IPAddress & prefix_, const IPAddress & mask_) +{ + prefix = prefix_; + mask = mask_; + + if (prefix.family() != mask.family()) + { + if (prefix.family() == IPAddress::IPv4) + prefix = IPAddress("::ffff:" + prefix.toString()); + + if (mask.family() == IPAddress::IPv4) + mask = IPAddress(96, IPAddress::IPv6) | IPAddress("::ffff:" + mask.toString()); + } + + prefix = prefix & mask; + + if (prefix.family() == IPAddress::IPv4) + { + if ((prefix & IPAddress{8, IPAddress::IPv4}) == IPAddress{"127.0.0.0"}) + { + // 127.XX.XX.XX -> 127.0.0.1 + prefix = IPAddress{"127.0.0.1"}; + mask = IPAddress{32, IPAddress::IPv4}; + } + } + else + { + if ((prefix & IPAddress{104, IPAddress::IPv6}) == IPAddress{"::ffff:127.0.0.0"}) + { + // ::ffff:127.XX.XX.XX -> ::1 + prefix = IPAddress{"::1"}; + mask = IPAddress{128, IPAddress::IPv6}; + } + } +} + +void AllowedClientHosts::IPSubnet::set(const IPAddress & prefix_, size_t num_prefix_bits) +{ + set(prefix_, IPAddress(num_prefix_bits, prefix_.family())); +} + +void AllowedClientHosts::IPSubnet::set(const IPAddress & address) +{ + set(address, address.length() * 8); +} + +AllowedClientHosts::IPSubnet::IPSubnet(const String & str) +{ + size_t slash = str.find('/'); + if (slash == String::npos) + { + set(IPAddress(str)); + return; + } + + IPAddress new_prefix{String{str, 0, slash}}; + String mask_str(str, slash + 1, str.length() - slash - 1); + bool only_digits = (mask_str.find_first_not_of("0123456789") == std::string::npos); + if (only_digits) + set(new_prefix, std::stoul(mask_str)); + else + set(new_prefix, IPAddress{mask_str}); +} + +String AllowedClientHosts::IPSubnet::toString() const +{ + unsigned int prefix_length = mask.prefixLength(); + if (isMaskAllBitsOne()) + return prefix.toString(); + else if (IPAddress{prefix_length, mask.family()} == mask) + return fs::path(prefix.toString()) / std::to_string(prefix_length); + else + return fs::path(prefix.toString()) / mask.toString(); +} + +bool AllowedClientHosts::IPSubnet::isMaskAllBitsOne() const +{ + return mask == IPAddress(mask.length() * 8, mask.family()); +} + + +void AllowedClientHosts::clear() +{ + addresses = {}; + subnets = {}; + names = {}; + name_regexps = {}; + like_patterns = {}; + any_host = false; + local_host = false; +} + +bool AllowedClientHosts::empty() const +{ + return !any_host && !local_host && addresses.empty() && subnets.empty() && names.empty() && name_regexps.empty() && like_patterns.empty(); +} + +void AllowedClientHosts::addAddress(const IPAddress & address) +{ + if (address.isLoopback()) + local_host = true; + else if (boost::range::find(addresses, address) == addresses.end()) + addresses.push_back(address); +} + +void AllowedClientHosts::removeAddress(const IPAddress & address) +{ + if (address.isLoopback()) + local_host = false; + else + boost::range::remove_erase(addresses, address); +} + +void AllowedClientHosts::addSubnet(const IPSubnet & subnet) +{ + if (subnet.getMask().isWildcard()) + any_host = true; + else if (subnet.isMaskAllBitsOne()) + addAddress(subnet.getPrefix()); + else if (boost::range::find(subnets, subnet) == subnets.end()) + subnets.push_back(subnet); +} + +void AllowedClientHosts::removeSubnet(const IPSubnet & subnet) +{ + if (subnet.getMask().isWildcard()) + any_host = false; + else if (subnet.isMaskAllBitsOne()) + removeAddress(subnet.getPrefix()); + else + boost::range::remove_erase(subnets, subnet); +} + +void AllowedClientHosts::addName(const String & name) +{ + if (boost::iequals(name, "localhost")) + local_host = true; + else if (boost::range::find(names, name) == names.end()) + names.push_back(name); +} + +void AllowedClientHosts::removeName(const String & name) +{ + if (boost::iequals(name, "localhost")) + local_host = false; + else + boost::range::remove_erase(names, name); +} + +void AllowedClientHosts::addNameRegexp(const String & name_regexp) +{ + if (boost::iequals(name_regexp, "localhost")) + local_host = true; + else if (name_regexp == ".*") + any_host = true; + else if (boost::range::find(name_regexps, name_regexp) == name_regexps.end()) + name_regexps.push_back(name_regexp); +} + +void AllowedClientHosts::removeNameRegexp(const String & name_regexp) +{ + if (boost::iequals(name_regexp, "localhost")) + local_host = false; + else if (name_regexp == ".*") + any_host = false; + else + boost::range::remove_erase(name_regexps, name_regexp); +} + +void AllowedClientHosts::addLikePattern(const String & pattern) +{ + if (boost::iequals(pattern, "localhost") || (pattern == "127.0.0.1") || (pattern == "::1")) + local_host = true; + else if ((pattern == "%") || (pattern == "0.0.0.0/0") || (pattern == "::/0")) + any_host = true; + else if (boost::range::find(like_patterns, pattern) == name_regexps.end()) + like_patterns.push_back(pattern); +} + +void AllowedClientHosts::removeLikePattern(const String & pattern) +{ + if (boost::iequals(pattern, "localhost") || (pattern == "127.0.0.1") || (pattern == "::1")) + local_host = false; + else if ((pattern == "%") || (pattern == "0.0.0.0/0") || (pattern == "::/0")) + any_host = false; + else + boost::range::remove_erase(like_patterns, pattern); +} + +void AllowedClientHosts::addLocalHost() +{ + local_host = true; +} + +void AllowedClientHosts::removeLocalHost() +{ + local_host = false; +} + +void AllowedClientHosts::addAnyHost() +{ + clear(); + any_host = true; +} + +void AllowedClientHosts::add(const AllowedClientHosts & other) +{ + if (other.containsAnyHost()) + { + addAnyHost(); + return; + } + if (other.containsLocalHost()) + addLocalHost(); + for (const IPAddress & address : other.getAddresses()) + addAddress(address); + for (const IPSubnet & subnet : other.getSubnets()) + addSubnet(subnet); + for (const String & name : other.getNames()) + addName(name); + for (const String & name_regexp : other.getNameRegexps()) + addNameRegexp(name_regexp); + for (const String & like_pattern : other.getLikePatterns()) + addLikePattern(like_pattern); +} + +void AllowedClientHosts::remove(const AllowedClientHosts & other) +{ + if (other.containsAnyHost()) + { + clear(); + return; + } + if (other.containsLocalHost()) + removeLocalHost(); + for (const IPAddress & address : other.getAddresses()) + removeAddress(address); + for (const IPSubnet & subnet : other.getSubnets()) + removeSubnet(subnet); + for (const String & name : other.getNames()) + removeName(name); + for (const String & name_regexp : other.getNameRegexps()) + removeNameRegexp(name_regexp); + for (const String & like_pattern : other.getLikePatterns()) + removeLikePattern(like_pattern); +} + + +bool operator ==(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) +{ + return (lhs.any_host == rhs.any_host) && (lhs.local_host == rhs.local_host) && (lhs.addresses == rhs.addresses) + && (lhs.subnets == rhs.subnets) && (lhs.names == rhs.names) && (lhs.name_regexps == rhs.name_regexps) + && (lhs.like_patterns == rhs.like_patterns); +} + + bool AllowedClientHosts::contains(const IPAddress & client_address) const { if (any_host) diff --git a/src/Access/Common/AllowedClientHosts.h b/src/Access/Common/AllowedClientHosts.h new file mode 100644 index 00000000000..6a6e3d73eef --- /dev/null +++ b/src/Access/Common/AllowedClientHosts.h @@ -0,0 +1,125 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +using Strings = std::vector; + +/// Represents lists of hosts a user is allowed to connect to server from. +class AllowedClientHosts +{ +public: + using IPAddress = Poco::Net::IPAddress; + + class IPSubnet + { + public: + IPSubnet() {} + IPSubnet(const IPAddress & prefix_, const IPAddress & mask_) { set(prefix_, mask_); } + IPSubnet(const IPAddress & prefix_, size_t num_prefix_bits) { set(prefix_, num_prefix_bits); } + explicit IPSubnet(const IPAddress & address) { set(address); } + explicit IPSubnet(const String & str); + + const IPAddress & getPrefix() const { return prefix; } + const IPAddress & getMask() const { return mask; } + bool isMaskAllBitsOne() const; + String toString() const; + + friend bool operator ==(const IPSubnet & lhs, const IPSubnet & rhs) { return (lhs.prefix == rhs.prefix) && (lhs.mask == rhs.mask); } + friend bool operator !=(const IPSubnet & lhs, const IPSubnet & rhs) { return !(lhs == rhs); } + + private: + void set(const IPAddress & prefix_, const IPAddress & mask_); + void set(const IPAddress & prefix_, size_t num_prefix_bits); + void set(const IPAddress & address); + + IPAddress prefix; + IPAddress mask; + }; + + struct AnyHostTag {}; + + AllowedClientHosts() {} + AllowedClientHosts(AnyHostTag) { addAnyHost(); } + ~AllowedClientHosts() {} + + AllowedClientHosts(const AllowedClientHosts & src) = default; + AllowedClientHosts & operator =(const AllowedClientHosts & src) = default; + AllowedClientHosts(AllowedClientHosts && src) = default; + AllowedClientHosts & operator =(AllowedClientHosts && src) = default; + + /// Removes all contained addresses. This will disallow all hosts. + void clear(); + + bool empty() const; + + /// Allows exact IP address. + /// For example, 213.180.204.3 or 2a02:6b8::3 + void addAddress(const IPAddress & address); + void addAddress(const String & address) { addAddress(IPAddress(address)); } + void removeAddress(const IPAddress & address); + void removeAddress(const String & address) { removeAddress(IPAddress{address}); } + const std::vector & getAddresses() const { return addresses; } + + /// Allows an IP subnet. + /// For example, 312.234.1.1/255.255.255.0 or 2a02:6b8::3/64 + void addSubnet(const IPSubnet & subnet); + void addSubnet(const String & subnet) { addSubnet(IPSubnet{subnet}); } + void addSubnet(const IPAddress & prefix, const IPAddress & mask) { addSubnet(IPSubnet{prefix, mask}); } + void addSubnet(const IPAddress & prefix, size_t num_prefix_bits) { addSubnet(IPSubnet{prefix, num_prefix_bits}); } + void removeSubnet(const IPSubnet & subnet); + void removeSubnet(const String & subnet) { removeSubnet(IPSubnet{subnet}); } + void removeSubnet(const IPAddress & prefix, const IPAddress & mask) { removeSubnet(IPSubnet{prefix, mask}); } + void removeSubnet(const IPAddress & prefix, size_t num_prefix_bits) { removeSubnet(IPSubnet{prefix, num_prefix_bits}); } + const std::vector & getSubnets() const { return subnets; } + + /// Allows an exact host name. The `contains()` function will check that the provided address equals to one of that host's addresses. + void addName(const String & name); + void removeName(const String & name); + const std::vector & getNames() const { return names; } + + /// Allows the host names matching a regular expression. + void addNameRegexp(const String & name_regexp); + void removeNameRegexp(const String & name_regexp); + const std::vector & getNameRegexps() const { return name_regexps; } + + /// Allows IP addresses or host names using LIKE pattern. + /// This pattern can contain % and _ wildcard characters. + /// For example, addLikePattern("%") will allow all addresses. + void addLikePattern(const String & pattern); + void removeLikePattern(const String & like_pattern); + const std::vector & getLikePatterns() const { return like_patterns; } + + /// Allows local host. + void addLocalHost(); + void removeLocalHost(); + bool containsLocalHost() const { return local_host;} + + /// Allows any host. + void addAnyHost(); + bool containsAnyHost() const { return any_host;} + + void add(const AllowedClientHosts & other); + void remove(const AllowedClientHosts & other); + + /// Checks if the provided address is in the list. Returns false if not. + bool contains(const IPAddress & address) const; + + friend bool operator ==(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs); + friend bool operator !=(const AllowedClientHosts & lhs, const AllowedClientHosts & rhs) { return !(lhs == rhs); } + +private: + std::vector addresses; + std::vector subnets; + Strings names; + Strings name_regexps; + Strings like_patterns; + bool any_host = false; + bool local_host = false; +}; + +} diff --git a/src/Access/Common/AuthenticationData.cpp b/src/Access/Common/AuthenticationData.cpp new file mode 100644 index 00000000000..012e7546270 --- /dev/null +++ b/src/Access/Common/AuthenticationData.cpp @@ -0,0 +1,196 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; +} + + +const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType type_) +{ + static constexpr auto make_info = [](const char * raw_name_) + { + String init_name = raw_name_; + boost::to_lower(init_name); + return AuthenticationTypeInfo{raw_name_, std::move(init_name)}; + }; + + switch (type_) + { + case AuthenticationType::NO_PASSWORD: + { + static const auto info = make_info("NO_PASSWORD"); + return info; + } + case AuthenticationType::PLAINTEXT_PASSWORD: + { + static const auto info = make_info("PLAINTEXT_PASSWORD"); + return info; + } + case AuthenticationType::SHA256_PASSWORD: + { + static const auto info = make_info("SHA256_PASSWORD"); + return info; + } + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + { + static const auto info = make_info("DOUBLE_SHA1_PASSWORD"); + return info; + } + case AuthenticationType::LDAP: + { + static const auto info = make_info("LDAP"); + return info; + } + case AuthenticationType::KERBEROS: + { + static const auto info = make_info("KERBEROS"); + return info; + } + case AuthenticationType::MAX: + break; + } + throw Exception("Unknown authentication type: " + std::to_string(static_cast(type_)), ErrorCodes::LOGICAL_ERROR); +} + + +AuthenticationData::Digest AuthenticationData::Util::encodeSHA256(const std::string_view & text [[maybe_unused]]) +{ +#if USE_SSL + Digest hash; + hash.resize(32); + ::DB::encodeSHA256(text, hash.data()); + return hash; +#else + throw DB::Exception( + "SHA256 passwords support is disabled, because ClickHouse was built without SSL library", + DB::ErrorCodes::SUPPORT_IS_DISABLED); +#endif +} + + +AuthenticationData::Digest AuthenticationData::Util::encodeSHA1(const std::string_view & text) +{ + Poco::SHA1Engine engine; + engine.update(text.data(), text.size()); + return engine.digest(); +} + + +bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) +{ + return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) + && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm); +} + + +void AuthenticationData::setPassword(const String & password_) +{ + switch (type) + { + case AuthenticationType::PLAINTEXT_PASSWORD: + return setPasswordHashBinary(Util::stringToDigest(password_)); + + case AuthenticationType::SHA256_PASSWORD: + return setPasswordHashBinary(Util::encodeSHA256(password_)); + + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + return setPasswordHashBinary(Util::encodeDoubleSHA1(password_)); + + case AuthenticationType::NO_PASSWORD: + case AuthenticationType::LDAP: + case AuthenticationType::KERBEROS: + throw Exception("Cannot specify password for authentication type " + toString(type), ErrorCodes::LOGICAL_ERROR); + + case AuthenticationType::MAX: + break; + } + throw Exception("setPassword(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); +} + + +String AuthenticationData::getPassword() const +{ + if (type != AuthenticationType::PLAINTEXT_PASSWORD) + throw Exception("Cannot decode the password", ErrorCodes::LOGICAL_ERROR); + return String(password_hash.data(), password_hash.data() + password_hash.size()); +} + + +void AuthenticationData::setPasswordHashHex(const String & hash) +{ + Digest digest; + digest.resize(hash.size() / 2); + boost::algorithm::unhex(hash.begin(), hash.end(), digest.data()); + setPasswordHashBinary(digest); +} + + +String AuthenticationData::getPasswordHashHex() const +{ + if (type == AuthenticationType::LDAP || type == AuthenticationType::KERBEROS) + throw Exception("Cannot get password hex hash for authentication type " + toString(type), ErrorCodes::LOGICAL_ERROR); + + String hex; + hex.resize(password_hash.size() * 2); + boost::algorithm::hex(password_hash.begin(), password_hash.end(), hex.data()); + return hex; +} + + +void AuthenticationData::setPasswordHashBinary(const Digest & hash) +{ + switch (type) + { + case AuthenticationType::PLAINTEXT_PASSWORD: + { + password_hash = hash; + return; + } + + case AuthenticationType::SHA256_PASSWORD: + { + if (hash.size() != 32) + throw Exception( + "Password hash for the 'SHA256_PASSWORD' authentication type has length " + std::to_string(hash.size()) + + " but must be exactly 32 bytes.", + ErrorCodes::BAD_ARGUMENTS); + password_hash = hash; + return; + } + + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + { + if (hash.size() != 20) + throw Exception( + "Password hash for the 'DOUBLE_SHA1_PASSWORD' authentication type has length " + std::to_string(hash.size()) + + " but must be exactly 20 bytes.", + ErrorCodes::BAD_ARGUMENTS); + password_hash = hash; + return; + } + + case AuthenticationType::NO_PASSWORD: + case AuthenticationType::LDAP: + case AuthenticationType::KERBEROS: + throw Exception("Cannot specify password binary hash for authentication type " + toString(type), ErrorCodes::LOGICAL_ERROR); + + case AuthenticationType::MAX: + break; + } + throw Exception("setPasswordHashBinary(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); +} + +} diff --git a/src/Access/Common/AuthenticationData.h b/src/Access/Common/AuthenticationData.h new file mode 100644 index 00000000000..8b390fd0900 --- /dev/null +++ b/src/Access/Common/AuthenticationData.h @@ -0,0 +1,102 @@ +#pragma once + +#include +#include + +namespace DB +{ + +enum class AuthenticationType +{ + /// User doesn't have to enter password. + NO_PASSWORD, + + /// Password is stored as is. + PLAINTEXT_PASSWORD, + + /// Password is encrypted in SHA256 hash. + SHA256_PASSWORD, + + /// SHA1(SHA1(password)). + /// This kind of hash is used by the `mysql_native_password` authentication plugin. + DOUBLE_SHA1_PASSWORD, + + /// Password is checked by a [remote] LDAP server. Connection will be made at each authentication attempt. + LDAP, + + /// Kerberos authentication performed through GSS-API negotiation loop. + KERBEROS, + + MAX, +}; + +struct AuthenticationTypeInfo +{ + const char * const raw_name; + const String name; /// Lowercased with underscores, e.g. "sha256_password". + static const AuthenticationTypeInfo & get(AuthenticationType type_); +}; + +inline String toString(AuthenticationType type_) +{ + return AuthenticationTypeInfo::get(type_).raw_name; +} + + +/// Stores data for checking password when a user logins. +class AuthenticationData +{ +public: + using Digest = std::vector; + + AuthenticationData(AuthenticationType type_ = AuthenticationType::NO_PASSWORD) : type(type_) {} + AuthenticationData(const AuthenticationData & src) = default; + AuthenticationData & operator =(const AuthenticationData & src) = default; + AuthenticationData(AuthenticationData && src) = default; + AuthenticationData & operator =(AuthenticationData && src) = default; + + AuthenticationType getType() const { return type; } + + /// Sets the password and encrypt it using the authentication type set in the constructor. + void setPassword(const String & password_); + + /// Returns the password. Allowed to use only for Type::PLAINTEXT_PASSWORD. + String getPassword() const; + + /// Sets the password as a string of hexadecimal digits. + void setPasswordHashHex(const String & hash); + String getPasswordHashHex() const; + + /// Sets the password in binary form. + void setPasswordHashBinary(const Digest & hash); + const Digest & getPasswordHashBinary() const { return password_hash; } + + /// Sets the server name for authentication type LDAP. + const String & getLDAPServerName() const { return ldap_server_name; } + void setLDAPServerName(const String & name) { ldap_server_name = name; } + + /// Sets the realm name for authentication type KERBEROS. + const String & getKerberosRealm() const { return kerberos_realm; } + void setKerberosRealm(const String & realm) { kerberos_realm = realm; } + + friend bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs); + friend bool operator !=(const AuthenticationData & lhs, const AuthenticationData & rhs) { return !(lhs == rhs); } + + struct Util + { + static Digest stringToDigest(const std::string_view & text) { return Digest(text.data(), text.data() + text.size()); } + static Digest encodeSHA256(const std::string_view & text); + static Digest encodeSHA1(const std::string_view & text); + static Digest encodeSHA1(const Digest & text) { return encodeSHA1(std::string_view{reinterpret_cast(text.data()), text.size()}); } + static Digest encodeDoubleSHA1(const std::string_view & text) { return encodeSHA1(encodeSHA1(text)); } + static Digest encodeDoubleSHA1(const Digest & text) { return encodeSHA1(encodeSHA1(text)); } + }; + +private: + AuthenticationType type = AuthenticationType::NO_PASSWORD; + Digest password_hash; + String ldap_server_name; + String kerberos_realm; +}; + +} diff --git a/src/Access/Common/CMakeLists.txt b/src/Access/Common/CMakeLists.txt new file mode 100644 index 00000000000..6a7682ec4bd --- /dev/null +++ b/src/Access/Common/CMakeLists.txt @@ -0,0 +1,5 @@ +include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") + +add_headers_and_sources(clickhouse_common_access .) +add_library(clickhouse_common_access ${clickhouse_common_access_headers} ${clickhouse_common_access_sources}) +target_link_libraries(clickhouse_common_access PUBLIC clickhouse_common_io) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index a186ffb3495..e9164b4ae44 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -142,13 +142,13 @@ namespace } -ContextAccess::ContextAccess(const AccessControlManager & manager_, const Params & params_) - : manager(&manager_) +ContextAccess::ContextAccess(const AccessControl & access_control_, const Params & params_) + : access_control(&access_control_) , params(params_) { std::lock_guard lock{mutex}; - subscription_for_user_change = manager->subscribeForChanges( + subscription_for_user_change = access_control->subscribeForChanges( *params.user_id, [this](const UUID &, const AccessEntityPtr & entity) { UserPtr changed_user = entity ? typeid_cast(entity) : nullptr; @@ -156,7 +156,7 @@ ContextAccess::ContextAccess(const AccessControlManager & manager_, const Params setUser(changed_user); }); - setUser(manager->read(*params.user_id)); + setUser(access_control->read(*params.user_id)); } @@ -194,7 +194,7 @@ void ContextAccess::setUser(const UserPtr & user_) const } subscription_for_roles_changes.reset(); - enabled_roles = manager->getEnabledRoles(current_roles, current_roles_with_admin_option); + enabled_roles = access_control->getEnabledRoles(current_roles, current_roles_with_admin_option); subscription_for_roles_changes = enabled_roles->subscribeForChanges([this](const std::shared_ptr & roles_info_) { std::lock_guard lock{mutex}; @@ -209,11 +209,11 @@ void ContextAccess::setRolesInfo(const std::shared_ptr & { assert(roles_info_); roles_info = roles_info_; - enabled_row_policies = manager->getEnabledRowPolicies( + enabled_row_policies = access_control->getEnabledRowPolicies( *params.user_id, roles_info->enabled_roles); - enabled_quota = manager->getEnabledQuota( + enabled_quota = access_control->getEnabledQuota( *params.user_id, user_name, roles_info->enabled_roles, params.address, params.forwarded_address, params.quota_key); - enabled_settings = manager->getEnabledSettings( + enabled_settings = access_control->getEnabledSettings( *params.user_id, user->settings, roles_info->enabled_roles, roles_info->settings_from_enabled_roles); calculateAccessRights(); } @@ -327,7 +327,7 @@ std::shared_ptr ContextAccess::getDefaultProfileInfo std::lock_guard lock{mutex}; if (enabled_settings) return enabled_settings->getInfo(); - static const auto everything_by_default = std::make_shared(*manager); + static const auto everything_by_default = std::make_shared(*access_control); return everything_by_default; } @@ -609,7 +609,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const template bool ContextAccess::checkAdminOptionImpl(const UUID & role_id) const { - return checkAdminOptionImplHelper(to_array(role_id), [this](const UUID & id, size_t) { return manager->tryReadName(id); }); + return checkAdminOptionImplHelper(to_array(role_id), [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); } template @@ -627,7 +627,7 @@ bool ContextAccess::checkAdminOptionImpl(const UUID & role_id, const std::unorde template bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids) const { - return checkAdminOptionImplHelper(role_ids, [this](const UUID & id, size_t) { return manager->tryReadName(id); }); + return checkAdminOptionImplHelper(role_ids, [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); } template diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 8f5f5a096e7..a7c91faf43b 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace Poco { class Logger; } @@ -25,7 +26,7 @@ struct QuotaUsage; struct Settings; struct SettingsProfilesInfo; class SettingsChanges; -class AccessControlManager; +class AccessControl; class IAST; using ASTPtr = std::shared_ptr; @@ -155,9 +156,9 @@ public: static std::shared_ptr getFullAccess(); private: - friend class AccessControlManager; + friend class AccessControl; ContextAccess() {} - ContextAccess(const AccessControlManager & manager_, const Params & params_); + ContextAccess(const AccessControl & access_control_, const Params & params_); void setUser(const UserPtr & user_) const; void setRolesInfo(const std::shared_ptr & roles_info_) const; @@ -203,7 +204,7 @@ private: template bool checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const; - const AccessControlManager * manager = nullptr; + const AccessControl * access_control = nullptr; const Params params; bool is_full_access = false; mutable Poco::Logger * trace_log = nullptr; diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index e3d773f4d91..247bcc1ee89 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -1,19 +1,19 @@ #include -#include -#include -#include -#include -#include #include #include #include #include #include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index aade1fdd6f1..a0ad5d4ec79 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -1,6 +1,7 @@ #include -#include +#include #include +#include #include #include #include @@ -495,7 +496,7 @@ bool IAccessStorage::areCredentialsValidImpl( if (credentials.getUserName() != user.getName()) return false; - return user.authentication.areCredentialsValid(credentials, external_authenticators); + return Authentication::areCredentialsValid(credentials, user.auth_data, external_authenticators); } diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 8a612982c79..92c9b15612b 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -27,10 +27,10 @@ namespace ErrorCodes } -LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix) +LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix) : IAccessStorage(storage_name_) { - setConfiguration(access_control_manager_, config, prefix); + setConfiguration(access_control_, config, prefix); } @@ -40,7 +40,7 @@ String LDAPAccessStorage::getLDAPServerName() const } -void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix) +void LDAPAccessStorage::setConfiguration(AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix) { std::scoped_lock lock(mutex); @@ -80,7 +80,7 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m } } - access_control_manager = access_control_manager_; + access_control = access_control_; ldap_server_name = ldap_server_name_cfg; role_search_params.swap(role_search_params_cfg); common_role_names.swap(common_roles_cfg); @@ -91,7 +91,7 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m granted_role_names.clear(); granted_role_ids.clear(); - role_change_subscription = access_control_manager->subscribeForChanges( + role_change_subscription = access_control->subscribeForChanges( [this] (const UUID & id, const AccessEntityPtr & entity) { return this->processRoleChange(id, entity); @@ -215,7 +215,7 @@ void LDAPAccessStorage::assignRolesNoLock(User & user, const LDAPClient::SearchR auto it = granted_role_ids.find(role_name); if (it == granted_role_ids.end()) { - if (const auto role_id = access_control_manager->find(role_name)) + if (const auto role_id = access_control->find(role_name)) { granted_role_names.insert_or_assign(*role_id, role_name); it = granted_role_ids.insert_or_assign(role_name, *role_id).first; @@ -527,8 +527,8 @@ UUID LDAPAccessStorage::loginImpl(const Credentials & credentials, const Poco::N // User does not exist, so we create one, and will add it if authentication is successful. auto user = std::make_shared(); user->setName(credentials.getUserName()); - user->authentication = Authentication(Authentication::Type::LDAP); - user->authentication.setLDAPServerName(ldap_server_name); + user->auth_data = AuthenticationData(AuthenticationType::LDAP); + user->auth_data.setLDAPServerName(ldap_server_name); if (!isAddressAllowedImpl(*user, address)) throwAddressNotAllowed(address); @@ -555,8 +555,8 @@ UUID LDAPAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const // User does not exist, so we create one, and add it pretending that the authentication is successful. auto user = std::make_shared(); user->setName(user_name); - user->authentication = Authentication(Authentication::Type::LDAP); - user->authentication.setLDAPServerName(ldap_server_name); + user->auth_data = AuthenticationData(AuthenticationType::LDAP); + user->auth_data.setLDAPServerName(ldap_server_name); LDAPClient::SearchResultsList external_roles; diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index 6cfff07b436..c1512117186 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -22,7 +22,7 @@ namespace Poco namespace DB { -class AccessControlManager; +class AccessControl; /// Implementation of IAccessStorage which allows attaching users from a remote LDAP server. /// Currently, any user name will be treated as a name of an existing remote user, @@ -32,7 +32,7 @@ class LDAPAccessStorage : public IAccessStorage public: static constexpr char STORAGE_TYPE[] = "ldap"; - explicit LDAPAccessStorage(const String & storage_name_, AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix); + explicit LDAPAccessStorage(const String & storage_name_, AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix); virtual ~LDAPAccessStorage() override = default; String getLDAPServerName() const; @@ -59,7 +59,7 @@ private: // IAccessStorage implementations. virtual UUID getIDOfLoggedUserImpl(const String & user_name) const override; private: - void setConfiguration(AccessControlManager * access_control_manager_, const Poco::Util::AbstractConfiguration & config, const String & prefix); + void setConfiguration(AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix); void processRoleChange(const UUID & id, const AccessEntityPtr & entity); void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); @@ -71,7 +71,7 @@ private: const ExternalAuthenticators & external_authenticators, LDAPClient::SearchResultsList & role_search_results) const; mutable std::recursive_mutex mutex; - AccessControlManager * access_control_manager = nullptr; + AccessControl * access_control = nullptr; String ldap_server_name; LDAPClient::RoleSearchParamsList role_search_params; std::set common_role_names; // role name that should be granted to all users at all times diff --git a/src/Access/QuotaCache.cpp b/src/Access/QuotaCache.cpp index f742751d5fd..e5fa9114a51 100644 --- a/src/Access/QuotaCache.cpp +++ b/src/Access/QuotaCache.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -172,8 +172,8 @@ boost::shared_ptr QuotaCache::QuotaInfo::rebuildI } -QuotaCache::QuotaCache(const AccessControlManager & access_control_manager_) - : access_control_manager(access_control_manager_) +QuotaCache::QuotaCache(const AccessControl & access_control_) + : access_control(access_control_) { } @@ -215,7 +215,7 @@ void QuotaCache::ensureAllQuotasRead() return; all_quotas_read = true; - subscription = access_control_manager.subscribeForChanges( + subscription = access_control.subscribeForChanges( [&](const UUID & id, const AccessEntityPtr & entity) { if (entity) @@ -224,9 +224,9 @@ void QuotaCache::ensureAllQuotasRead() quotaRemoved(id); }); - for (const UUID & quota_id : access_control_manager.findAll()) + for (const UUID & quota_id : access_control.findAll()) { - auto quota = access_control_manager.tryRead(quota_id); + auto quota = access_control.tryRead(quota_id); if (quota) all_quotas.emplace(quota_id, QuotaInfo(quota, quota_id)); } diff --git a/src/Access/QuotaCache.h b/src/Access/QuotaCache.h index e991399d2e9..487c7a26487 100644 --- a/src/Access/QuotaCache.h +++ b/src/Access/QuotaCache.h @@ -10,14 +10,14 @@ namespace DB { -class AccessControlManager; +class AccessControl; /// Stores information how much amount of resources have been consumed and how much are left. class QuotaCache { public: - QuotaCache(const AccessControlManager & access_control_manager_); + QuotaCache(const AccessControl & access_control_); ~QuotaCache(); std::shared_ptr getEnabledQuota( @@ -56,7 +56,7 @@ private: void chooseQuotaToConsume(); void chooseQuotaToConsumeFor(EnabledQuota & enabled_quota); - const AccessControlManager & access_control_manager; + const AccessControl & access_control; mutable std::mutex mutex; std::unordered_map all_quotas; bool all_quotas_read = false; diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index e9c88868e8c..f0e1435e299 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include @@ -56,8 +56,8 @@ namespace } -RoleCache::RoleCache(const AccessControlManager & manager_) - : manager(manager_), cache(600000 /* 10 minutes */) {} +RoleCache::RoleCache(const AccessControl & access_control_) + : access_control(access_control_), cache(600000 /* 10 minutes */) {} RoleCache::~RoleCache() = default; @@ -136,7 +136,7 @@ RolePtr RoleCache::getRole(const UUID & role_id) if (role_from_cache) return role_from_cache->first; - auto subscription = manager.subscribeForChanges(role_id, + auto subscription = access_control.subscribeForChanges(role_id, [this, role_id](const UUID &, const AccessEntityPtr & entity) { auto changed_role = entity ? typeid_cast(entity) : nullptr; @@ -146,7 +146,7 @@ RolePtr RoleCache::getRole(const UUID & role_id) roleRemoved(role_id); }); - auto role = manager.tryRead(role_id); + auto role = access_control.tryRead(role_id); if (role) { auto cache_value = Poco::SharedPtr>( diff --git a/src/Access/RoleCache.h b/src/Access/RoleCache.h index 0a1d03c1a80..42f4eec5b49 100644 --- a/src/Access/RoleCache.h +++ b/src/Access/RoleCache.h @@ -9,14 +9,14 @@ namespace DB { -class AccessControlManager; +class AccessControl; struct Role; using RolePtr = std::shared_ptr; class RoleCache { public: - RoleCache(const AccessControlManager & manager_); + RoleCache(const AccessControl & access_control_); ~RoleCache(); std::shared_ptr getEnabledRoles( @@ -30,7 +30,7 @@ private: void roleChanged(const UUID & role_id, const RolePtr & changed_role); void roleRemoved(const UUID & role_id); - const AccessControlManager & manager; + const AccessControl & access_control; Poco::ExpireCache> cache; std::map> enabled_roles; mutable std::mutex mutex; diff --git a/src/Access/RolesOrUsersSet.cpp b/src/Access/RolesOrUsersSet.cpp index ebd4f0f7a40..810198eeb98 100644 --- a/src/Access/RolesOrUsersSet.cpp +++ b/src/Access/RolesOrUsersSet.cpp @@ -1,9 +1,9 @@ #include -#include +#include +#include +#include #include #include -#include -#include #include #include #include @@ -53,40 +53,40 @@ RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const std::opti init(ast, nullptr, current_user_id); } -RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControl & access_control) { - init(ast, &manager); + init(ast, &access_control); } -RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager, const std::optional & current_user_id) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControl & access_control, const std::optional & current_user_id) { - init(ast, &manager, current_user_id); + init(ast, &access_control, current_user_id); } -void RolesOrUsersSet::init(const ASTRolesOrUsersSet & ast, const AccessControlManager * manager, const std::optional & current_user_id) +void RolesOrUsersSet::init(const ASTRolesOrUsersSet & ast, const AccessControl * access_control, const std::optional & current_user_id) { all = ast.all; - auto name_to_id = [&ast, manager](const String & name) -> UUID + auto name_to_id = [&ast, access_control](const String & name) -> UUID { if (ast.id_mode) return parse(name); - assert(manager); + assert(access_control); if (ast.allow_users && ast.allow_roles) { - auto id = manager->find(name); + auto id = access_control->find(name); if (id) return *id; - return manager->getID(name); + return access_control->getID(name); } else if (ast.allow_users) { - return manager->getID(name); + return access_control->getID(name); } else { assert(ast.allow_roles); - return manager->getID(name); + return access_control->getID(name); } }; @@ -147,7 +147,7 @@ std::shared_ptr RolesOrUsersSet::toAST() const } -std::shared_ptr RolesOrUsersSet::toASTWithNames(const AccessControlManager & manager) const +std::shared_ptr RolesOrUsersSet::toASTWithNames(const AccessControl & access_control) const { auto ast = std::make_shared(); ast->all = all; @@ -157,7 +157,7 @@ std::shared_ptr RolesOrUsersSet::toASTWithNames(const Access ast->names.reserve(ids.size()); for (const UUID & id : ids) { - auto name = manager.tryReadName(id); + auto name = access_control.tryReadName(id); if (name) ast->names.emplace_back(std::move(*name)); } @@ -169,7 +169,7 @@ std::shared_ptr RolesOrUsersSet::toASTWithNames(const Access ast->except_names.reserve(except_ids.size()); for (const UUID & except_id : except_ids) { - auto except_name = manager.tryReadName(except_id); + auto except_name = access_control.tryReadName(except_id); if (except_name) ast->except_names.emplace_back(std::move(*except_name)); } @@ -187,9 +187,9 @@ String RolesOrUsersSet::toString() const } -String RolesOrUsersSet::toStringWithNames(const AccessControlManager & manager) const +String RolesOrUsersSet::toStringWithNames(const AccessControl & access_control) const { - auto ast = toASTWithNames(manager); + auto ast = toASTWithNames(access_control); return serializeAST(*ast); } @@ -253,25 +253,25 @@ bool RolesOrUsersSet::match(const UUID & user_id, const boost::container::flat_s std::vector RolesOrUsersSet::getMatchingIDs() const { if (all) - throw Exception("getAllMatchingIDs() can't get ALL ids without manager", ErrorCodes::LOGICAL_ERROR); + throw Exception("getAllMatchingIDs() can't get ALL ids without access_control", ErrorCodes::LOGICAL_ERROR); std::vector res; boost::range::set_difference(ids, except_ids, std::back_inserter(res)); return res; } -std::vector RolesOrUsersSet::getMatchingIDs(const AccessControlManager & manager) const +std::vector RolesOrUsersSet::getMatchingIDs(const AccessControl & access_control) const { if (!all) return getMatchingIDs(); std::vector res; - for (const UUID & id : manager.findAll()) + for (const UUID & id : access_control.findAll()) { if (match(id)) res.push_back(id); } - for (const UUID & id : manager.findAll()) + for (const UUID & id : access_control.findAll()) { if (match(id)) res.push_back(id); diff --git a/src/Access/RolesOrUsersSet.h b/src/Access/RolesOrUsersSet.h index 871bb0c0758..1d5842e31a6 100644 --- a/src/Access/RolesOrUsersSet.h +++ b/src/Access/RolesOrUsersSet.h @@ -10,7 +10,7 @@ namespace DB { class ASTRolesOrUsersSet; -class AccessControlManager; +class AccessControl; /// Represents a set of users/roles like @@ -31,18 +31,18 @@ struct RolesOrUsersSet RolesOrUsersSet(const UUID & id); RolesOrUsersSet(const std::vector & ids_); - /// The constructor from AST requires the AccessControlManager if `ast.id_mode == false`. + /// The constructor from AST requires the AccessControl if `ast.id_mode == false`. RolesOrUsersSet(const ASTRolesOrUsersSet & ast); RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const std::optional & current_user_id); - RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager); - RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager, const std::optional & current_user_id); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControl & access_control); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControl & access_control, const std::optional & current_user_id); std::shared_ptr toAST() const; - std::shared_ptr toASTWithNames(const AccessControlManager & manager) const; + std::shared_ptr toASTWithNames(const AccessControl & access_control) const; String toString() const; - String toStringWithNames(const AccessControlManager & manager) const; - Strings toStringsWithNames(const AccessControlManager & manager) const; + String toStringWithNames(const AccessControl & access_control) const; + Strings toStringsWithNames(const AccessControl & access_control) const; bool empty() const; void clear(); @@ -57,7 +57,7 @@ struct RolesOrUsersSet std::vector getMatchingIDs() const; /// Returns a list of matching users and roles. - std::vector getMatchingIDs(const AccessControlManager & manager) const; + std::vector getMatchingIDs(const AccessControl & access_control) const; friend bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs); friend bool operator !=(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs) { return !(lhs == rhs); } @@ -67,7 +67,7 @@ struct RolesOrUsersSet boost::container::flat_set except_ids; private: - void init(const ASTRolesOrUsersSet & ast, const AccessControlManager * manager = nullptr, const std::optional & current_user_id = {}); + void init(const ASTRolesOrUsersSet & ast, const AccessControl * access_control = nullptr, const std::optional & current_user_id = {}); }; } diff --git a/src/Access/RowPolicyCache.cpp b/src/Access/RowPolicyCache.cpp index c8402dbdbd4..b5b6dd99438 100644 --- a/src/Access/RowPolicyCache.cpp +++ b/src/Access/RowPolicyCache.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -92,8 +92,8 @@ void RowPolicyCache::PolicyInfo::setPolicy(const RowPolicyPtr & policy_) } -RowPolicyCache::RowPolicyCache(const AccessControlManager & access_control_manager_) - : access_control_manager(access_control_manager_) +RowPolicyCache::RowPolicyCache(const AccessControl & access_control_) + : access_control(access_control_) { } @@ -131,7 +131,7 @@ void RowPolicyCache::ensureAllRowPoliciesRead() return; all_policies_read = true; - subscription = access_control_manager.subscribeForChanges( + subscription = access_control.subscribeForChanges( [&](const UUID & id, const AccessEntityPtr & entity) { if (entity) @@ -140,9 +140,9 @@ void RowPolicyCache::ensureAllRowPoliciesRead() rowPolicyRemoved(id); }); - for (const UUID & id : access_control_manager.findAll()) + for (const UUID & id : access_control.findAll()) { - auto quota = access_control_manager.tryRead(id); + auto quota = access_control.tryRead(id); if (quota) all_policies.emplace(id, PolicyInfo(quota)); } diff --git a/src/Access/RowPolicyCache.h b/src/Access/RowPolicyCache.h index 959eb989d4c..6834def58b6 100644 --- a/src/Access/RowPolicyCache.h +++ b/src/Access/RowPolicyCache.h @@ -9,13 +9,13 @@ namespace DB { -class AccessControlManager; +class AccessControl; /// Stores read and parsed row policies. class RowPolicyCache { public: - RowPolicyCache(const AccessControlManager & access_control_manager_); + RowPolicyCache(const AccessControl & access_control_); ~RowPolicyCache(); std::shared_ptr getEnabledRowPolicies(const UUID & user_id, const boost::container::flat_set & enabled_roles); @@ -38,7 +38,7 @@ private: void mixConditions(); void mixConditionsFor(EnabledRowPolicies & enabled); - const AccessControlManager & access_control_manager; + const AccessControl & access_control; std::unordered_map all_policies; bool all_policies_read = false; scope_guard subscription; diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 316f869fc79..be59fc13136 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -18,7 +18,7 @@ namespace ErrorCodes } -SettingsConstraints::SettingsConstraints(const AccessControlManager & manager_) : manager(&manager_) +SettingsConstraints::SettingsConstraints(const AccessControl & access_control_) : access_control(&access_control_) { } @@ -200,8 +200,8 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh }; if (reaction == THROW_ON_VIOLATION) - manager->checkSettingNameIsAllowed(setting_name); - else if (!manager->isSettingNameAllowed(setting_name)) + access_control->checkSettingNameIsAllowed(setting_name); + else if (!access_control->isSettingNameAllowed(setting_name)) return false; Field current_value, new_value; diff --git a/src/Access/SettingsConstraints.h b/src/Access/SettingsConstraints.h index 4259fe15e25..ec0421e060d 100644 --- a/src/Access/SettingsConstraints.h +++ b/src/Access/SettingsConstraints.h @@ -14,7 +14,7 @@ namespace DB struct Settings; struct SettingChange; class SettingsChanges; -class AccessControlManager; +class AccessControl; /** Checks if specified changes of settings are allowed or not. @@ -51,7 +51,7 @@ class AccessControlManager; class SettingsConstraints { public: - SettingsConstraints(const AccessControlManager & manager_); + SettingsConstraints(const AccessControl & access_control_); SettingsConstraints(const SettingsConstraints & src); SettingsConstraints & operator =(const SettingsConstraints & src); SettingsConstraints(SettingsConstraints && src); @@ -109,7 +109,7 @@ private: const Constraint * tryGetConstraint(const std::string_view & setting_name) const; std::unordered_map constraints; - const AccessControlManager * manager = nullptr; + const AccessControl * access_control = nullptr; }; } diff --git a/src/Access/SettingsConstraintsAndProfileIDs.h b/src/Access/SettingsConstraintsAndProfileIDs.h index 5538a10555e..c1f3f59af6b 100644 --- a/src/Access/SettingsConstraintsAndProfileIDs.h +++ b/src/Access/SettingsConstraintsAndProfileIDs.h @@ -15,7 +15,7 @@ struct SettingsConstraintsAndProfileIDs std::vector current_profiles; std::vector enabled_profiles; - SettingsConstraintsAndProfileIDs(const AccessControlManager & manager_) : constraints(manager_) {} + SettingsConstraintsAndProfileIDs(const AccessControl & access_control_) : constraints(access_control_) {} }; } diff --git a/src/Access/SettingsProfileElement.cpp b/src/Access/SettingsProfileElement.cpp index b42bcd1c279..ea6edef94a6 100644 --- a/src/Access/SettingsProfileElement.cpp +++ b/src/Access/SettingsProfileElement.cpp @@ -1,12 +1,12 @@ #include #include -#include +#include #include -#include #include #include #include #include +#include #include @@ -17,19 +17,19 @@ SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & init(ast, nullptr); } -SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & ast, const AccessControlManager & manager) +SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & ast, const AccessControl & access_control) { - init(ast, &manager); + init(ast, &access_control); } -void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const AccessControlManager * manager) +void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const AccessControl * access_control) { - auto name_to_id = [id_mode{ast.id_mode}, manager](const String & name_) -> UUID + auto name_to_id = [id_mode{ast.id_mode}, access_control](const String & name_) -> UUID { if (id_mode) return parse(name_); - assert(manager); - return manager->getID(name_); + assert(access_control); + return access_control->getID(name_); }; if (!ast.parent_profile.empty()) @@ -40,8 +40,8 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A setting_name = ast.setting_name; /// Optionally check if a setting with that name is allowed. - if (manager) - manager->checkSettingNameIsAllowed(setting_name); + if (access_control) + access_control->checkSettingNameIsAllowed(setting_name); value = ast.value; min_value = ast.min_value; @@ -76,13 +76,13 @@ std::shared_ptr SettingsProfileElement::toAST() const } -std::shared_ptr SettingsProfileElement::toASTWithNames(const AccessControlManager & manager) const +std::shared_ptr SettingsProfileElement::toASTWithNames(const AccessControl & access_control) const { auto ast = std::make_shared(); if (parent_profile) { - auto parent_profile_name = manager.tryReadName(*parent_profile); + auto parent_profile_name = access_control.tryReadName(*parent_profile); if (parent_profile_name) ast->parent_profile = *parent_profile_name; } @@ -103,10 +103,10 @@ SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElement emplace_back(*ast_element); } -SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControlManager & manager) +SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control) { for (const auto & ast_element : ast.elements) - emplace_back(*ast_element, manager); + emplace_back(*ast_element, access_control); } @@ -118,11 +118,11 @@ std::shared_ptr SettingsProfileElements::toAST() con return res; } -std::shared_ptr SettingsProfileElements::toASTWithNames(const AccessControlManager & manager) const +std::shared_ptr SettingsProfileElements::toASTWithNames(const AccessControl & access_control) const { auto res = std::make_shared(); for (const auto & element : *this) - res->elements.push_back(element.toASTWithNames(manager)); + res->elements.push_back(element.toASTWithNames(access_control)); return res; } @@ -155,9 +155,9 @@ SettingsChanges SettingsProfileElements::toSettingsChanges() const return res; } -SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessControlManager & manager) const +SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessControl & access_control) const { - SettingsConstraints res{manager}; + SettingsConstraints res{access_control}; for (const auto & elem : *this) { if (!elem.setting_name.empty()) diff --git a/src/Access/SettingsProfileElement.h b/src/Access/SettingsProfileElement.h index d0e2343e726..aadc5fc9e3f 100644 --- a/src/Access/SettingsProfileElement.h +++ b/src/Access/SettingsProfileElement.h @@ -13,7 +13,7 @@ class SettingsChanges; class SettingsConstraints; class ASTSettingsProfileElement; class ASTSettingsProfileElements; -class AccessControlManager; +class AccessControl; struct SettingsProfileElement @@ -35,14 +35,14 @@ struct SettingsProfileElement SettingsProfileElement() {} - /// The constructor from AST requires the AccessControlManager if `ast.id_mode == false`. + /// The constructor from AST requires the AccessControl if `ast.id_mode == false`. SettingsProfileElement(const ASTSettingsProfileElement & ast); - SettingsProfileElement(const ASTSettingsProfileElement & ast, const AccessControlManager & manager); + SettingsProfileElement(const ASTSettingsProfileElement & ast, const AccessControl & access_control); std::shared_ptr toAST() const; - std::shared_ptr toASTWithNames(const AccessControlManager & manager) const; + std::shared_ptr toASTWithNames(const AccessControl & access_control) const; private: - void init(const ASTSettingsProfileElement & ast, const AccessControlManager * manager); + void init(const ASTSettingsProfileElement & ast, const AccessControl * access_control); }; @@ -51,17 +51,17 @@ class SettingsProfileElements : public std::vector public: SettingsProfileElements() {} - /// The constructor from AST requires the AccessControlManager if `ast.id_mode == false`. + /// The constructor from AST requires the AccessControl if `ast.id_mode == false`. SettingsProfileElements(const ASTSettingsProfileElements & ast); - SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControlManager & manager); + SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control); std::shared_ptr toAST() const; - std::shared_ptr toASTWithNames(const AccessControlManager & manager) const; + std::shared_ptr toASTWithNames(const AccessControl & access_control) const; void merge(const SettingsProfileElements & other); Settings toSettings() const; SettingsChanges toSettingsChanges() const; - SettingsConstraints toSettingsConstraints(const AccessControlManager & manager) const; + SettingsConstraints toSettingsConstraints(const AccessControl & access_control) const; std::vector toProfileIDs() const; }; diff --git a/src/Access/SettingsProfilesCache.cpp b/src/Access/SettingsProfilesCache.cpp index 3cd73720c3e..2a3dedbbd7a 100644 --- a/src/Access/SettingsProfilesCache.cpp +++ b/src/Access/SettingsProfilesCache.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -12,8 +12,8 @@ namespace ErrorCodes extern const int THERE_IS_NO_PROFILE; } -SettingsProfilesCache::SettingsProfilesCache(const AccessControlManager & manager_) - : manager(manager_) {} +SettingsProfilesCache::SettingsProfilesCache(const AccessControl & access_control_) + : access_control(access_control_) {} SettingsProfilesCache::~SettingsProfilesCache() = default; @@ -25,7 +25,7 @@ void SettingsProfilesCache::ensureAllProfilesRead() return; all_profiles_read = true; - subscription = manager.subscribeForChanges( + subscription = access_control.subscribeForChanges( [&](const UUID & id, const AccessEntityPtr & entity) { if (entity) @@ -34,9 +34,9 @@ void SettingsProfilesCache::ensureAllProfilesRead() profileRemoved(id); }); - for (const UUID & id : manager.findAll()) + for (const UUID & id : access_control.findAll()) { - auto profile = manager.tryRead(id); + auto profile = access_control.tryRead(id); if (profile) { all_profiles.emplace(id, profile); @@ -138,11 +138,11 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena merged_settings.merge(enabled.params.settings_from_enabled_roles); merged_settings.merge(enabled.params.settings_from_user); - auto info = std::make_shared(manager); + auto info = std::make_shared(access_control); info->profiles = enabled.params.settings_from_user.toProfileIDs(); substituteProfiles(merged_settings, info->profiles_with_implicit, info->names_of_profiles); info->settings = merged_settings.toSettingsChanges(); - info->constraints = merged_settings.toSettingsConstraints(manager); + info->constraints = merged_settings.toSettingsConstraints(access_control); enabled.setInfo(std::move(info)); } @@ -225,13 +225,13 @@ std::shared_ptr SettingsProfilesCache::getSettingsPr SettingsProfileElements elements = all_profiles[profile_id]->elements; - auto info = std::make_shared(manager); + auto info = std::make_shared(access_control); info->profiles.push_back(profile_id); info->profiles_with_implicit.push_back(profile_id); substituteProfiles(elements, info->profiles_with_implicit, info->names_of_profiles); info->settings = elements.toSettingsChanges(); - info->constraints.merge(elements.toSettingsConstraints(manager)); + info->constraints.merge(elements.toSettingsConstraints(access_control)); profile_infos_cache.add(profile_id, info); return info; diff --git a/src/Access/SettingsProfilesCache.h b/src/Access/SettingsProfilesCache.h index e5ba70b9df8..da852275ff5 100644 --- a/src/Access/SettingsProfilesCache.h +++ b/src/Access/SettingsProfilesCache.h @@ -9,7 +9,7 @@ namespace DB { -class AccessControlManager; +class AccessControl; struct SettingsProfile; using SettingsProfilePtr = std::shared_ptr; struct SettingsProfilesInfo; @@ -18,7 +18,7 @@ struct SettingsProfilesInfo; class SettingsProfilesCache { public: - SettingsProfilesCache(const AccessControlManager & manager_); + SettingsProfilesCache(const AccessControl & access_control_); ~SettingsProfilesCache(); void setDefaultProfileName(const String & default_profile_name); @@ -39,7 +39,7 @@ private: void mergeSettingsAndConstraintsFor(EnabledSettings & enabled) const; void substituteProfiles(SettingsProfileElements & elements, std::vector & substituted_profiles, std::unordered_map & names_of_substituted_profiles) const; - const AccessControlManager & manager; + const AccessControl & access_control; std::unordered_map all_profiles; std::unordered_map profiles_by_name; bool all_profiles_read = false; diff --git a/src/Access/SettingsProfilesInfo.cpp b/src/Access/SettingsProfilesInfo.cpp index 7efb16dd865..d8b139020e8 100644 --- a/src/Access/SettingsProfilesInfo.cpp +++ b/src/Access/SettingsProfilesInfo.cpp @@ -29,7 +29,7 @@ bool operator==(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & r std::shared_ptr SettingsProfilesInfo::getConstraintsAndProfileIDs(const std::shared_ptr & previous) const { - auto res = std::make_shared(manager); + auto res = std::make_shared(access_control); res->current_profiles = profiles; if (previous) diff --git a/src/Access/SettingsProfilesInfo.h b/src/Access/SettingsProfilesInfo.h index f554ba8d89e..b7f9b1da28d 100644 --- a/src/Access/SettingsProfilesInfo.h +++ b/src/Access/SettingsProfilesInfo.h @@ -29,7 +29,7 @@ struct SettingsProfilesInfo /// Names of all the profiles in `profiles`. std::unordered_map names_of_profiles; - SettingsProfilesInfo(const AccessControlManager & manager_) : constraints(manager_), manager(manager_) {} + SettingsProfilesInfo(const AccessControl & access_control_) : constraints(access_control_), access_control(access_control_) {} std::shared_ptr getConstraintsAndProfileIDs( const std::shared_ptr & previous = nullptr) const; @@ -47,7 +47,7 @@ struct SettingsProfilesInfo } private: - const AccessControlManager & manager; + const AccessControl & access_control; }; } diff --git a/src/Access/User.cpp b/src/Access/User.cpp index e21b48e11a0..d7c7f5c7ada 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -9,7 +9,7 @@ bool User::equal(const IAccessEntity & other) const if (!IAccessEntity::equal(other)) return false; const auto & other_user = typeid_cast(other); - return (authentication == other_user.authentication) && (allowed_client_hosts == other_user.allowed_client_hosts) + return (auth_data == other_user.auth_data) && (allowed_client_hosts == other_user.allowed_client_hosts) && (access == other_user.access) && (granted_roles == other_user.granted_roles) && (default_roles == other_user.default_roles) && (settings == other_user.settings) && (grantees == other_user.grantees) && (default_database == other_user.default_database); } diff --git a/src/Access/User.h b/src/Access/User.h index 6b61d5afdea..34badd5f847 100644 --- a/src/Access/User.h +++ b/src/Access/User.h @@ -2,8 +2,8 @@ #include #include -#include -#include +#include +#include #include #include #include @@ -15,7 +15,7 @@ namespace DB */ struct User : public IAccessEntity { - Authentication authentication; + AuthenticationData auth_data; AllowedClientHosts allowed_client_hosts = AllowedClientHosts::AnyHostTag{}; AccessRights access; GrantedRoles granted_roles; diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 7c5baa92b27..2d202c5094d 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -75,18 +75,18 @@ namespace if (has_password_plaintext) { - user->authentication = Authentication{Authentication::PLAINTEXT_PASSWORD}; - user->authentication.setPassword(config.getString(user_config + ".password")); + user->auth_data = AuthenticationData{AuthenticationType::PLAINTEXT_PASSWORD}; + user->auth_data.setPassword(config.getString(user_config + ".password")); } else if (has_password_sha256_hex) { - user->authentication = Authentication{Authentication::SHA256_PASSWORD}; - user->authentication.setPasswordHashHex(config.getString(user_config + ".password_sha256_hex")); + user->auth_data = AuthenticationData{AuthenticationType::SHA256_PASSWORD}; + user->auth_data.setPasswordHashHex(config.getString(user_config + ".password_sha256_hex")); } else if (has_password_double_sha1_hex) { - user->authentication = Authentication{Authentication::DOUBLE_SHA1_PASSWORD}; - user->authentication.setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex")); + user->auth_data = AuthenticationData{AuthenticationType::DOUBLE_SHA1_PASSWORD}; + user->auth_data.setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex")); } else if (has_ldap) { @@ -98,15 +98,15 @@ namespace if (ldap_server_name.empty()) throw Exception("LDAP server name cannot be empty for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); - user->authentication = Authentication{Authentication::LDAP}; - user->authentication.setLDAPServerName(ldap_server_name); + user->auth_data = AuthenticationData{AuthenticationType::LDAP}; + user->auth_data.setLDAPServerName(ldap_server_name); } else if (has_kerberos) { const auto realm = config.getString(user_config + ".kerberos.realm", ""); - user->authentication = Authentication{Authentication::KERBEROS}; - user->authentication.setKerberosRealm(realm); + user->auth_data = AuthenticationData{AuthenticationType::KERBEROS}; + user->auth_data.setKerberosRealm(realm); } const auto profile_name_config = user_config + ".profile"; diff --git a/src/AggregateFunctions/AggregateFunctionMap.cpp b/src/AggregateFunctions/AggregateFunctionMap.cpp new file mode 100644 index 00000000000..1808e1c2ffa --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionMap.cpp @@ -0,0 +1,135 @@ +#include "AggregateFunctionMap.h" +#include "AggregateFunctions/AggregateFunctionCombinatorFactory.h" +#include "Functions/FunctionHelpers.h" + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class AggregateFunctionCombinatorMap final : public IAggregateFunctionCombinator +{ +public: + String getName() const override { return "Map"; } + + DataTypes transformArguments(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Incorrect number of arguments for aggregate function with " + getName() + " suffix"); + + const auto * map_type = checkAndGetDataType(arguments[0].get()); + if (map_type) + { + if (arguments.size() > 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, getName() + " combinator takes only one map argument"); + + return DataTypes({map_type->getValueType()}); + } + + // we need this part just to pass to redirection for mapped arrays + auto check_func = [](DataTypePtr t) { return t->getTypeId() == TypeIndex::Array; }; + + const auto * tup_type = checkAndGetDataType(arguments[0].get()); + if (tup_type) + { + const auto & types = tup_type->getElements(); + bool arrays_match = arguments.size() == 1 && types.size() >= 2 && std::all_of(types.begin(), types.end(), check_func); + if (arrays_match) + { + const auto * val_array_type = assert_cast(types[1].get()); + return DataTypes({val_array_type->getNestedType()}); + } + } + else + { + bool arrays_match = arguments.size() >= 2 && std::all_of(arguments.begin(), arguments.end(), check_func); + if (arrays_match) + { + const auto * val_array_type = assert_cast(arguments[1].get()); + return DataTypes({val_array_type->getNestedType()}); + } + } + + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function " + getName() + " requires map as argument"); + } + + AggregateFunctionPtr transformAggregateFunction( + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array & params) const override + { + const auto * map_type = checkAndGetDataType(arguments[0].get()); + if (map_type) + { + const auto & key_type = map_type->getKeyType(); + + switch (key_type->getTypeId()) + { + case TypeIndex::Enum8: + case TypeIndex::Int8: + return std::make_shared>(nested_function, arguments); + case TypeIndex::Enum16: + case TypeIndex::Int16: + return std::make_shared>(nested_function, arguments); + case TypeIndex::Int32: + return std::make_shared>(nested_function, arguments); + case TypeIndex::Int64: + return std::make_shared>(nested_function, arguments); + case TypeIndex::Int128: + return std::make_shared>(nested_function, arguments); + case TypeIndex::Int256: + return std::make_shared>(nested_function, arguments); + case TypeIndex::UInt8: + return std::make_shared>(nested_function, arguments); + case TypeIndex::Date: + case TypeIndex::UInt16: + return std::make_shared>(nested_function, arguments); + case TypeIndex::DateTime: + case TypeIndex::UInt32: + return std::make_shared>(nested_function, arguments); + case TypeIndex::UInt64: + return std::make_shared>(nested_function, arguments); + case TypeIndex::UInt128: + return std::make_shared>(nested_function, arguments); + case TypeIndex::UInt256: + return std::make_shared>(nested_function, arguments); + case TypeIndex::UUID: + return std::make_shared>(nested_function, arguments); + case TypeIndex::FixedString: + case TypeIndex::String: + return std::make_shared>(nested_function, arguments); + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Map key type " + key_type->getName() + " is not is not supported by combinator " + getName()); + } + } + else + { + // in case of tuple of arrays or just arrays (checked in transformArguments), try to redirect to sum/min/max-MappedArrays to implement old behavior + auto nested_func_name = nested_function->getName(); + if (nested_func_name == "sum" || nested_func_name == "min" || nested_func_name == "max") + { + AggregateFunctionProperties out_properties; + auto & aggr_func_factory = AggregateFunctionFactory::instance(); + return aggr_func_factory.get(nested_func_name + "MappedArrays", arguments, params, out_properties); + } + else + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregation '" + nested_func_name + "Map' is not implemented for mapped arrays"); + } + } +}; + +void registerAggregateFunctionCombinatorMap(AggregateFunctionCombinatorFactory & factory) +{ + factory.registerCombinator(std::make_shared()); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h new file mode 100644 index 00000000000..e5367ac4f9f --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -0,0 +1,247 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "base/types.h" +#include +#include "AggregateFunctions/AggregateFunctionFactory.h" + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +struct AggregateFunctionMapCombinatorData +{ + using SearchType = KeyType; + std::unordered_map merged_maps; + + static void writeKey(KeyType key, WriteBuffer & buf) { writeBinary(key, buf); } + static void readKey(KeyType & key, ReadBuffer & buf) { readBinary(key, buf); } +}; + +template <> +struct AggregateFunctionMapCombinatorData +{ + struct StringHash + { + using hash_type = std::hash; + using is_transparent = void; + + size_t operator()(std::string_view str) const { return hash_type{}(str); } + }; + +#ifdef __cpp_lib_generic_unordered_lookup + using SearchType = std::string_view; +#else + using SearchType = std::string; +#endif + std::unordered_map> merged_maps; + + static void writeKey(String key, WriteBuffer & buf) + { + writeVarUInt(key.size(), buf); + writeString(key, buf); + } + static void readKey(String & key, ReadBuffer & buf) + { + UInt64 size; + readVarUInt(size, buf); + key.resize(size); + buf.readStrict(key.data(), size); + } +}; + +template +class AggregateFunctionMap final + : public IAggregateFunctionDataHelper, AggregateFunctionMap> +{ +private: + DataTypePtr key_type; + AggregateFunctionPtr nested_func; + + using Data = AggregateFunctionMapCombinatorData; + using Base = IAggregateFunctionDataHelper>; + +public: + AggregateFunctionMap(AggregateFunctionPtr nested, const DataTypes & types) : Base(types, nested->getParameters()), nested_func(nested) + { + if (types.empty()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function " + getName() + " requires at least one argument"); + + if (types.size() > 1) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function " + getName() + " requires only one map argument"); + + const auto * map_type = checkAndGetDataType(types[0].get()); + if (!map_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function " + getName() + " requires map as argument"); + + key_type = map_type->getKeyType(); + } + + String getName() const override { return nested_func->getName() + "Map"; } + + DataTypePtr getReturnType() const override { return std::make_shared(DataTypes{key_type, nested_func->getReturnType()}); } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + const auto & map_column = assert_cast(*columns[0]); + const auto & map_nested_tuple = map_column.getNestedData(); + const IColumn::Offsets & map_array_offsets = map_column.getNestedColumn().getOffsets(); + + const size_t offset = map_array_offsets[row_num - 1]; + const size_t size = (map_array_offsets[row_num] - offset); + + const auto & key_column = map_nested_tuple.getColumn(0); + const auto & val_column = map_nested_tuple.getColumn(1); + + auto & merged_maps = this->data(place).merged_maps; + + for (size_t i = 0; i < size; ++i) + { + typename Data::SearchType key; + + if constexpr (std::is_same::value) + { + StringRef key_ref; + if (key_type->getTypeId() == TypeIndex::FixedString) + key_ref = assert_cast(key_column).getDataAt(offset + i); + else + key_ref = assert_cast(key_column).getDataAt(offset + i); + +#ifdef __cpp_lib_generic_unordered_lookup + key = static_cast(key_ref); +#else + key = key_ref.toString(); +#endif + } + else + { + key = assert_cast &>(key_column).getData()[offset + i]; + } + + AggregateDataPtr nested_place; + auto it = merged_maps.find(key); + + if (it == merged_maps.end()) + { + // create a new place for each key + nested_place = arena->alignedAlloc(nested_func->sizeOfData(), nested_func->alignOfData()); + nested_func->create(nested_place); + merged_maps.emplace(key, nested_place); + } + else + nested_place = it->second; + + const IColumn * nested_columns[1] = {&val_column}; + nested_func->add(nested_place, nested_columns, offset + i, arena); + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + auto & merged_maps = this->data(place).merged_maps; + const auto & rhs_maps = this->data(rhs).merged_maps; + + for (const auto & elem : rhs_maps) + { + const auto & it = merged_maps.find(elem.first); + + if (it != merged_maps.end()) + { + nested_func->merge(it->second, elem.second, arena); + } + else + merged_maps[elem.first] = elem.second; + } + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + auto & merged_maps = this->data(place).merged_maps; + writeVarUInt(merged_maps.size(), buf); + + for (const auto & elem : merged_maps) + { + this->data(place).writeKey(elem.first, buf); + nested_func->serialize(elem.second, buf); + } + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + { + auto & merged_maps = this->data(place).merged_maps; + UInt64 size; + + readVarUInt(size, buf); + for (UInt64 i = 0; i < size; ++i) + { + KeyType key; + AggregateDataPtr nested_place; + + this->data(place).readKey(key, buf); + nested_place = arena->alignedAlloc(nested_func->sizeOfData(), nested_func->alignOfData()); + nested_func->create(nested_place); + merged_maps.emplace(key, nested_place); + nested_func->deserialize(nested_place, buf, arena); + } + } + + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override + { + auto & map_column = assert_cast(to); + auto & nested_column = map_column.getNestedColumn(); + auto & nested_data_column = map_column.getNestedData(); + + auto & key_column = nested_data_column.getColumn(0); + auto & val_column = nested_data_column.getColumn(1); + + auto & merged_maps = this->data(place).merged_maps; + + // sort the keys + std::vector keys; + keys.reserve(merged_maps.size()); + for (auto & it : merged_maps) + { + keys.push_back(it.first); + } + std::sort(keys.begin(), keys.end()); + + // insert using sorted keys to result column + for (auto & key : keys) + { + key_column.insert(key); + nested_func->insertResultInto(merged_maps[key], val_column, arena); + } + + IColumn::Offsets & res_offsets = nested_column.getOffsets(); + res_offsets.push_back(val_column.size()); + } + + bool allocatesMemoryInArena() const override { return true; } + + AggregateFunctionPtr getNestedFunction() const override { return nested_func; } +}; + +} diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp index 245b089108b..32c51d5f746 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -145,9 +145,20 @@ struct MaxMapDispatchOnTupleArgument void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory) { - factory.registerFunction("sumMap", createAggregateFunctionMap< + // these functions used to be called *Map, with now these names occupied by + // Map combinator, which redirects calls here if was called with + // array or tuple arguments. + factory.registerFunction("sumMappedArrays", createAggregateFunctionMap< SumMapVariants::DispatchOnTupleArgument>); + factory.registerFunction("minMappedArrays", + createAggregateFunctionMap); + + factory.registerFunction("maxMappedArrays", + createAggregateFunctionMap); + + // these functions could be renamed to *MappedArrays too, but it would + // break backward compatibility factory.registerFunction("sumMapWithOverflow", createAggregateFunctionMap< SumMapVariants::DispatchOnTupleArgument>); @@ -157,12 +168,6 @@ void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory) factory.registerFunction("sumMapFilteredWithOverflow", createAggregateFunctionMap< SumMapVariants::DispatchOnTupleArgument>); - - factory.registerFunction("minMap", - createAggregateFunctionMap); - - factory.registerFunction("maxMap", - createAggregateFunctionMap); } } diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index e573cf08c50..4fad93facbc 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -377,7 +377,17 @@ public: assertNoParameters(getName(), params_); } - String getName() const override { return "sumMap"; } + String getName() const override + { + if constexpr (overflow) + { + return "sumMapWithOverflow"; + } + else + { + return "sumMap"; + } + } bool keepKey(const T &) const { return true; } }; diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index bffc344062e..79a418ac69f 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -65,6 +65,7 @@ void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory &); +void registerAggregateFunctionCombinatorMap(AggregateFunctionCombinatorFactory & factory); void registerWindowFunctions(AggregateFunctionFactory & factory); @@ -134,6 +135,7 @@ void registerAggregateFunctions() registerAggregateFunctionCombinatorOrFill(factory); registerAggregateFunctionCombinatorResample(factory); registerAggregateFunctionCombinatorDistinct(factory); + registerAggregateFunctionCombinatorMap(factory); } } diff --git a/src/Bridge/XDBCBridgeHelper.h b/src/Bridge/XDBCBridgeHelper.h index 80d9610f837..d321c1f23de 100644 --- a/src/Bridge/XDBCBridgeHelper.h +++ b/src/Bridge/XDBCBridgeHelper.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87e6cc86d94..0a7c3123f9d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -166,6 +166,7 @@ endif() target_link_libraries (clickhouse_common_io PRIVATE jemalloc) +add_subdirectory(Access/Common) add_subdirectory(Common/ZooKeeper) add_subdirectory(Common/Config) @@ -197,6 +198,7 @@ add_object_library(clickhouse_databases Databases) add_object_library(clickhouse_databases_mysql Databases/MySQL) add_object_library(clickhouse_disks Disks) add_object_library(clickhouse_interpreters Interpreters) +add_object_library(clickhouse_interpreters_access Interpreters/Access) add_object_library(clickhouse_interpreters_mysql Interpreters/MySQL) add_object_library(clickhouse_interpreters_clusterproxy Interpreters/ClusterProxy) add_object_library(clickhouse_interpreters_jit Interpreters/JIT) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 6f2b178067f..85913b3925f 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -10,10 +10,15 @@ #include #include #include +#include "Common/Exception.h" +#include "Common/getNumberOfPhysicalCPUCores.h" +#include "Common/tests/gtest_global_context.h" +#include "Common/typeid_cast.h" #include "Columns/ColumnString.h" #include "Columns/ColumnsNumber.h" #include "Core/Block.h" #include "Core/Protocol.h" +#include "Formats/FormatFactory.h" #include #include @@ -77,6 +82,7 @@ namespace ErrorCodes extern const int INVALID_USAGE_OF_INPUT; extern const int CANNOT_SET_SIGNAL_HANDLER; extern const int UNRECOGNIZED_ARGUMENTS; + extern const int LOGICAL_ERROR; } } @@ -842,6 +848,13 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query) { + /// Get columns description from variable or (if it was empty) create it from sample. + auto columns_description_for_query = columns_description.empty() ? ColumnsDescription(sample.getNamesAndTypesList()) : columns_description; + if (columns_description_for_query.empty()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column description is empty and it can't be built from sample from table. Cannot execute query."); + } + /// If INSERT data must be sent. auto * parsed_insert_query = parsed_query->as(); if (!parsed_insert_query) @@ -863,7 +876,8 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des /// Get name of this file (path to file) const auto & in_file_node = parsed_insert_query->infile->as(); const auto in_file = in_file_node.value.safeGet(); - + /// Get name of table + const auto table_name = parsed_insert_query->table_id.getTableName(); std::string compression_method; /// Compression method can be specified in query if (parsed_insert_query->compression) @@ -872,13 +886,35 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des compression_method = compression_method_node.value.safeGet(); } - /// Otherwise, it will be detected from file name automatically (by chooseCompressionMethod) - /// Buffer for reading from file is created and wrapped with appropriate compression method - auto in_buffer = wrapReadBufferWithCompressionMethod(std::make_unique(in_file), chooseCompressionMethod(in_file, compression_method)); + /// Create temporary storage file, to support globs and parallel reading + StorageFile::CommonArguments args{ + WithContext(global_context), + parsed_insert_query->table_id, + parsed_insert_query->format, + getFormatSettings(global_context), + compression_method, + columns_description_for_query, + ConstraintsDescription{}, + String{}, + }; + StoragePtr storage = StorageFile::create(in_file, global_context->getUserFilesPath(), args); + storage->startup(); + SelectQueryInfo query_info; try { - sendDataFrom(*in_buffer, sample, columns_description, parsed_query); + sendDataFromPipe( + storage->read( + sample.getNames(), + storage->getInMemoryMetadataPtr(), + query_info, + global_context, + {}, + global_context->getSettingsRef().max_block_size, + getNumberOfPhysicalCPUCores() + ), + parsed_query + ); } catch (Exception & e) { @@ -892,7 +928,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); try { - sendDataFrom(data_in, sample, columns_description, parsed_query); + sendDataFrom(data_in, sample, columns_description_for_query, parsed_query); } catch (Exception & e) { @@ -917,7 +953,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des /// Send data read from stdin. try { - sendDataFrom(std_in, sample, columns_description, parsed_query); + sendDataFrom(std_in, sample, columns_description_for_query, parsed_query); } catch (Exception & e) { @@ -952,6 +988,11 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes }); } + sendDataFromPipe(std::move(pipe), parsed_query); +} + +void ClientBase::sendDataFromPipe(Pipe&& pipe, ASTPtr parsed_query) +{ QueryPipeline pipeline(std::move(pipe)); PullingAsyncPipelineExecutor executor(pipeline); @@ -1612,9 +1653,13 @@ void ClientBase::init(int argc, char ** argv) ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.") ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") + ("echo", "in batch mode, print query before execution") ("verbose", "print query and other debugging info") + ("log-level", po::value(), "log level") + ("server_logs_file", po::value(), "put server logs into specified file") + ("multiline,m", "multiline") ("multiquery,n", "multiquery") @@ -1630,6 +1675,8 @@ void ClientBase::init(int argc, char ** argv) ("hardware-utilization", "print hardware utilization information in progress bar") ("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets") ("profile-events-delay-ms", po::value()->default_value(profile_events.delay_ms), "Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet)") + + ("interactive", "Process queries-file or --query query and start interactive mode") ; addOptions(options_description); @@ -1699,8 +1746,13 @@ void ClientBase::init(int argc, char ** argv) config().setString("history_file", options["history_file"].as()); if (options.count("verbose")) config().setBool("verbose", true); + if (options.count("interactive")) + config().setBool("interactive", true); + if (options.count("log-level")) Poco::Logger::root().setLevel(options["log-level"].as()); + if (options.count("server_logs_file")) + server_logs_file = options["server_logs_file"].as(); if (options.count("hardware-utilization")) progress_indication.print_hardware_utilization = true; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 93906946616..40fc6cacd31 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include namespace po = boost::program_options; @@ -57,6 +59,7 @@ protected: virtual bool executeMultiQuery(const String & all_queries_text) = 0; virtual void connect() = 0; + virtual void prepareForInteractive() = 0; virtual void processError(const String & query) const = 0; virtual String getName() const = 0; @@ -120,6 +123,7 @@ private: void sendData(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query); void sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query); + void sendDataFromPipe(Pipe && pipe, ASTPtr parsed_query); void sendExternalTables(ASTPtr parsed_query); void initBlockOutputStream(const Block & block, ASTPtr parsed_query); @@ -138,6 +142,7 @@ private: protected: bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool is_multiquery = false; + bool delayed_interactive = false; bool echo_queries = false; /// Print queries before execution in batch mode. bool ignore_error = false; /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode. diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 55d3a2d6a5f..4f476b57c27 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -70,7 +70,10 @@ void LocalConnection::sendQuery( query_context = session.makeQueryContext(); query_context->setCurrentQueryId(query_id); if (send_progress) + { query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); }); + query_context->setFileProgressCallback([this](const FileProgress & value) { this->updateProgress(Progress(value)); }); + } CurrentThread::QueryScope query_scope_holder(query_context); diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 3cd914a89bb..ec2bd025627 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -593,6 +593,7 @@ M(623, CAPN_PROTO_BAD_CAST) \ M(624, BAD_FILE_TYPE) \ M(625, IO_SETUP_ERROR) \ + M(626, CANNOT_SKIP_UNKNOWN_FIELD) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/FileChecker.cpp b/src/Common/FileChecker.cpp index 64c13ceb69c..4de5a92a1b8 100644 --- a/src/Common/FileChecker.cpp +++ b/src/Common/FileChecker.cpp @@ -38,6 +38,11 @@ void FileChecker::setPath(const String & file_info_path_) files_info_path = file_info_path_; } +String FileChecker::getPath() const +{ + return files_info_path; +} + void FileChecker::update(const String & full_file_path) { bool exists = disk->exists(full_file_path); diff --git a/src/Common/FileChecker.h b/src/Common/FileChecker.h index 325e9325267..a0ea449393e 100644 --- a/src/Common/FileChecker.h +++ b/src/Common/FileChecker.h @@ -13,7 +13,9 @@ class FileChecker { public: FileChecker(DiskPtr disk_, const String & file_info_path_); + void setPath(const String & file_info_path_); + String getPath() const; void update(const String & full_file_path); void setEmpty(const String & full_file_path); diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index ee4b67469ce..69b56be48ac 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -25,7 +25,6 @@ Int32 IntervalKind::toAvgSeconds() const __builtin_unreachable(); } - IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds) { if (num_seconds) diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index 85372ffa9ab..f1b5dce0792 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -58,4 +58,15 @@ struct IntervalKind /// For example, `IntervalKind::tryParseString('second', result)` returns `result` equals `IntervalKind::Kind::Second`. static bool tryParseString(const std::string & kind, IntervalKind::Kind & result); }; + +#define FOR_EACH_INTERVAL_KIND(M) \ + M(Second) \ + M(Minute) \ + M(Hour) \ + M(Day) \ + M(Week) \ + M(Month) \ + M(Quarter) \ + M(Year) + } diff --git a/src/Common/remapExecutable.cpp b/src/Common/remapExecutable.cpp index a774b1028d5..0c1cb03457f 100644 --- a/src/Common/remapExecutable.cpp +++ b/src/Common/remapExecutable.cpp @@ -1,3 +1,5 @@ +#include "remapExecutable.h" + #if defined(__linux__) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) && !defined(SPLIT_SHARED_LIBRARIES) #include @@ -11,8 +13,6 @@ #include #include -#include "remapExecutable.h" - namespace DB { @@ -136,10 +136,11 @@ __attribute__((__noinline__)) void remapToHugeStep1(void * begin, size_t size) } -void remapExecutable() +size_t remapExecutable() { auto [begin, size] = getMappedArea(reinterpret_cast(remapExecutable)); remapToHugeStep1(begin, size); + return size; } } @@ -149,7 +150,7 @@ void remapExecutable() namespace DB { -void remapExecutable() {} +size_t remapExecutable() { return 0; } } diff --git a/src/Common/remapExecutable.h b/src/Common/remapExecutable.h index bad5f7adb78..b5af9c82f84 100644 --- a/src/Common/remapExecutable.h +++ b/src/Common/remapExecutable.h @@ -1,8 +1,12 @@ #pragma once + +#include + namespace DB { /// This function tries to reallocate the code of the running program in a more efficient way. -void remapExecutable(); +/// @return size of remapped area. +size_t remapExecutable(); } diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index eabb1f2c5a6..8dbbf8d64fb 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -380,6 +380,14 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin min_log_id = last_commited_log_index; max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1; } + else if (last_commited_log_index != 0 && max_log_id < last_commited_log_index - 1) /// If we have more fresh snapshot than our logs + { + LOG_WARNING(log, "Our most fresh log_id {} is smaller than stored data in snapshot {}. It can indicate data loss. Removing outdated logs.", max_log_id, last_commited_log_index - 1); + + removeAllLogs(); + min_log_id = last_commited_log_index; + max_log_id = last_commited_log_index - 1; + } else if (last_log_is_not_complete) /// if it's complete just start new one { assert(last_log_read_result != std::nullopt); diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index b04af1422dc..01a6d577f57 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1466,7 +1466,6 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) changelog2.append(entry); changelog2.end_of_append_batch(0, 0); } - } TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) @@ -1515,6 +1514,33 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) } +TEST_P(CoordinationTest, TestLogGap) +{ + using namespace Coordination; + auto test_params = GetParam(); + ChangelogDirTest logs("./logs"); + DB::KeeperLogStore changelog("./logs", 100, true, test_params.enable_compression); + + changelog.init(0, 3); + for (size_t i = 1; i < 55; ++i) + { + std::shared_ptr request = std::make_shared(); + request->path = "/hello_" + std::to_string(i); + auto entry = getLogEntryFromZKRequest(0, 1, request); + changelog.append(entry); + changelog.end_of_append_batch(0, 0); + } + + DB::KeeperLogStore changelog1("./logs", 100, true, test_params.enable_compression); + changelog1.init(61, 3); + + /// Logs discarded + EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin" + test_params.extension)); + EXPECT_EQ(changelog1.start_index(), 61); + EXPECT_EQ(changelog1.next_slot(), 61); +} + + INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, CoordinationTest, ::testing::ValuesIn(std::initializer_list{ diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 4aaa735e52d..168ee346626 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -582,6 +582,17 @@ DataTypes Block::getDataTypes() const return res; } +Names Block::getDataTypeNames() const +{ + Names res; + res.reserve(columns()); + + for (const auto & elem : data) + res.push_back(elem.type->getName()); + + return res; +} + bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs) { diff --git a/src/Core/Block.h b/src/Core/Block.h index e0a032094f6..c0c9391e3b2 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -90,6 +90,7 @@ public: NamesAndTypesList getNamesAndTypesList() const; Names getNames() const; DataTypes getDataTypes() const; + Names getDataTypeNames() const; /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. size_t rows() const; diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index 7d34e23eed2..dd26bf41b4a 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -825,7 +825,7 @@ public: Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) = 0; - virtual Authentication::Type getType() const = 0; + virtual AuthenticationType getType() const = 0; virtual ~AuthenticationMethod() = default; }; @@ -842,9 +842,9 @@ public: return setPassword(user_name, "", session, mt, address); } - Authentication::Type getType() const override + AuthenticationType getType() const override { - return Authentication::Type::NO_PASSWORD; + return AuthenticationType::NO_PASSWORD; } }; @@ -873,9 +873,9 @@ public: ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); } - Authentication::Type getType() const override + AuthenticationType getType() const override { - return Authentication::Type::PLAINTEXT_PASSWORD; + return AuthenticationType::PLAINTEXT_PASSWORD; } }; @@ -883,7 +883,7 @@ class AuthenticationManager { private: Poco::Logger * log = &Poco::Logger::get("AuthenticationManager"); - std::unordered_map> type_to_method = {}; + std::unordered_map> type_to_method = {}; public: AuthenticationManager(const std::vector> & auth_methods) @@ -900,7 +900,7 @@ public: Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) { - const Authentication::Type user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); + const AuthenticationType user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); if (type_to_method.find(user_auth_type) != type_to_method.end()) { type_to_method[user_auth_type]->authenticate(user_name, session, mt, address); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e6f685a0650..3863046b511 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -459,7 +459,8 @@ class IColumn; M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ - M(Bool, optimize_syntax_fuse_functions, false, "Fuse aggregate functions (`sum, avg, count` with identical arguments into one `sumCount`, quantile-family functions with the same argument into `quantiles*(...)[...]`)", 0) \ + M(Bool, optimize_syntax_fuse_functions, false, "Allow apply syntax optimisation: fuse aggregate functions", 0) \ + M(Bool, optimize_fuse_sum_count_avg, false, "Fuse functions `sum, avg, count` with identical arguments into one `sumCount` (`optimize_syntax_fuse_functions should be enabled)", 0) \ M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \ M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \ M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ @@ -511,6 +512,8 @@ class IColumn; M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \ M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \ + M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ + M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ \ M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ @@ -540,7 +543,6 @@ class IColumn; M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \ M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \ M(UInt64, replication_alter_columns_timeout, 60, "Obsolete setting, does nothing.", 0) \ - M(Bool, optimize_fuse_sum_count_avg, false, "Obsolete, use optimize_syntax_fuse_functions", 0) \ M(UInt64, odbc_max_field_size, 0, "Obsolete setting, does nothing.", 0) \ /** The section above is for obsolete settings. Do not add anything there. */ @@ -553,13 +555,14 @@ class IColumn; M(Bool, format_csv_allow_single_quotes, true, "If it is set to true, allow strings in single quotes.", 0) \ M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \ M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \ - M(Bool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \ M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices \\N", 0) \ M(Bool, input_format_csv_arrays_as_nested_csv, false, R"(When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted.)", 0) \ - M(Bool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).", 0) \ - M(Bool, input_format_with_names_use_header, true, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \ + M(Bool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, -WithNames, -WithNamesAndTypes and TSKV formats).", 0) \ + M(Bool, input_format_with_names_use_header, true, "For -WithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \ + M(Bool, input_format_with_types_use_header, true, "For -WithNamesAndTypes input formats this controls whether format parser should check if data types from the input match data types from the header.", 0) \ M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \ - M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).", IMPORTANT) \ + M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, -WithNames, -WithNamesAndTypes formats).", IMPORTANT) \ + M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ @@ -575,10 +578,8 @@ class IColumn; M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ - M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ \ M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ - \ M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \ \ M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \ @@ -593,9 +594,11 @@ class IColumn; M(UInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ + M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ + M(UInt64, output_format_avro_rows_in_file, 1, "Max rows in a file (if permitted by storage)", 0) \ M(Bool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \ - M(String, output_format_csv_null_representation, "\\N", "Custom NULL representation in CSV format", 0) \ - M(String, output_format_tsv_null_representation, "\\N", "Custom NULL representation in TSV format", 0) \ + M(String, format_csv_null_representation, "\\N", "Custom NULL representation in CSV format", 0) \ + M(String, format_tsv_null_representation, "\\N", "Custom NULL representation in TSV format", 0) \ M(Bool, output_format_decimal_trailing_zeros, false, "Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23.", 0) \ \ M(UInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \ diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index e1653b9102b..db15f3a54db 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -6,6 +6,7 @@ #include #include #include +#include class Collator; @@ -27,7 +28,11 @@ struct FillColumnDescription /// Range [FROM, TO) respects sorting direction Field fill_from; /// Fill value >= FILL_FROM Field fill_to; /// Fill value + STEP < FILL_TO - Field fill_step; /// Default = 1 or -1 according to direction + Field fill_step; /// Default = +1 or -1 according to direction + std::optional step_kind; + + using StepFunction = std::function; + StepFunction step_func; }; /// Description of the sorting rule by one column. diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index 023629fc699..dc0411331e6 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -31,7 +31,8 @@ void DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(const Aggreg /// TODO Make it sane. static const std::vector supported_functions{"any", "anyLast", "min", "max", "sum", "sumWithOverflow", "groupBitAnd", "groupBitOr", "groupBitXor", - "sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray"}; + "sumMap", "minMap", "maxMap", "groupArrayArray", "groupUniqArrayArray", + "sumMappedArrays", "minMappedArrays", "maxMappedArrays"}; // check function if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions)) diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index 1266174c6d6..7f4286046d9 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -51,6 +51,7 @@ public: bool isNullable() const override { return false; } bool onlyNull() const override { return false; } bool lowCardinality() const override { return true; } + bool isLowCardinalityNullable() const override { return dictionary_type->isNullable(); } static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type); static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys); diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index a53fdac797f..fc42d678d57 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -269,6 +269,9 @@ public: virtual bool lowCardinality() const { return false; } + /// Checks if this type is LowCardinality(Nullable(...)) + virtual bool isLowCardinalityNullable() const { return false; } + /// Strings, Numbers, Date, DateTime, Nullable virtual bool canBeInsideLowCardinality() const { return false; } diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 5c0274b0e35..92aeeaa7bac 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -202,6 +203,20 @@ bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path) return true; } +void ISerialization::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + String field; + /// Read until \t or \n. + readString(field, istr); + ReadBufferFromString buf(field); + deserializeWholeText(column, buf, settings); +} + +void ISerialization::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeText(column, row_num, ostr, settings); +} + size_t ISerialization::getArrayLevel(const SubstreamPath & path) { size_t level = 0; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 7562cfcb9a0..aab58daab03 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -283,6 +283,14 @@ public: serializeText(column, row_num, ostr, settings); } + /** Text deserialization without escaping and quoting. Reads all data until first \n or \t + * into a temporary string and then call deserializeWholeText. It was implemented this way + * because this function is rarely used and because proper implementation requires a lot of + * additional code in data types serialization and ReadHelpers. + */ + virtual void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; + virtual void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; + static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path); static String getFileNameForStream(const String & name_in_storage, const SubstreamPath & path); static String getSubcolumnNameForStream(const SubstreamPath & path); diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index 5c63631e2a3..972313a564f 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -163,7 +163,7 @@ void SerializationFixedString::deserializeTextQuoted(IColumn & column, ReadBuffe void SerializationFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { - read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); }); + read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringUntilEOFInto(data, istr); }); } diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index 7a86d5413b2..d83a6c0ee83 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -818,6 +818,7 @@ void SerializationLowCardinality::serializeTextJSON(const IColumn & column, size { serializeImpl(column, row_num, &ISerialization::serializeTextJSON, ostr, settings); } + void SerializationLowCardinality::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { deserializeImpl(column, &ISerialization::deserializeTextJSON, istr, settings); @@ -828,6 +829,16 @@ void SerializationLowCardinality::serializeTextXML(const IColumn & column, size_ serializeImpl(column, row_num, &ISerialization::serializeTextXML, ostr, settings); } +void SerializationLowCardinality::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeImpl(column, &ISerialization::deserializeTextRaw, istr, settings); +} + +void SerializationLowCardinality::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeImpl(column, row_num, &ISerialization::serializeTextRaw, ostr, settings); +} + template void SerializationLowCardinality::serializeImpl( const IColumn & column, size_t row_num, SerializationLowCardinality::SerializeFunctionPtr func, Args &&... args) const diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index f82b35a52d5..af26405fcfa 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -64,6 +64,8 @@ public: void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; private: template diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 560a4812123..5e2b31ebb9d 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -7,13 +7,12 @@ #include #include #include -#include #include #include #include -#include -#include +#include #include +#include namespace DB { @@ -256,60 +255,123 @@ void SerializationNullable::deserializeTextEscaped(IColumn & column, ReadBuffer deserializeTextEscapedImpl(column, istr, settings, nested); } +void SerializationNullable::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextRawImpl(column, istr, settings, nested); +} + +void SerializationNullable::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const ColumnNullable & col = assert_cast(column); + + if (col.isNullAt(row_num)) + writeString(settings.tsv.null_representation, ostr); + else + nested->serializeTextRaw(col.getNestedColumn(), row_num, ostr, settings); +} + +template +ReturnType SerializationNullable::deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested) +{ + return deserializeTextEscapedAndRawImpl(column, istr, settings, nested); +} + template ReturnType SerializationNullable::deserializeTextEscapedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, - const SerializationPtr & nested) + const SerializationPtr & nested) { - /// Little tricky, because we cannot discriminate null from first character. + return deserializeTextEscapedAndRawImpl(column, istr, settings, nested); +} - if (istr.eof() || *istr.position() != '\\') /// Some data types can deserialize absence of data (e.g. empty string), so eof is ok. +template +ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, + const SerializationPtr & nested_serialization) +{ + const String & null_representation = settings.tsv.null_representation; + + /// Some data types can deserialize absence of data (e.g. empty string), so eof is ok. + if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0])) { /// This is not null, surely. - return safeDeserialize(column, *nested, + return safeDeserialize(column, *nested_serialization, [] { return false; }, - [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextEscaped(nested_column, istr, settings); }); - } - else - { - /// Now we know, that data in buffer starts with backslash. - ++istr.position(); - - if (istr.eof()) - throw ParsingException("Unexpected end of stream, while parsing value of Nullable type, after backslash", ErrorCodes::CANNOT_READ_ALL_DATA); - - return safeDeserialize(column, *nested, - [&istr] + [&nested_serialization, &istr, &settings] (IColumn & nested_column) { - if (*istr.position() == 'N') - { - ++istr.position(); - return true; - } - return false; - }, - [&nested, &istr, &settings] (IColumn & nested_column) - { - if (istr.position() != istr.buffer().begin()) - { - /// We could step back to consume backslash again. - --istr.position(); - nested->deserializeTextEscaped(nested_column, istr, settings); - } + if constexpr (escaped) + nested_serialization->deserializeTextEscaped(nested_column, istr, settings); else - { - /// Otherwise, we need to place backslash back in front of istr. - ReadBufferFromMemory prefix("\\", 1); - ConcatReadBuffer prepended_istr(prefix, istr); - - nested->deserializeTextEscaped(nested_column, prepended_istr, settings); - - /// Synchronise cursor position in original buffer. - - if (prepended_istr.count() > 1) - istr.position() = prepended_istr.position(); - } + nested_serialization->deserializeTextRaw(nested_column, istr, settings); }); } + + /// Check if we have enough data in buffer to check if it's a null. + if (istr.available() > null_representation.size()) + { + auto check_for_null = [&istr, &null_representation]() + { + auto * pos = istr.position(); + if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n')) + return true; + istr.position() = pos; + return false; + }; + auto deserialize_nested = [&nested_serialization, &settings, &istr] (IColumn & nested_column) + { + if constexpr (escaped) + nested_serialization->deserializeTextEscaped(nested_column, istr, settings); + else + nested_serialization->deserializeTextRaw(nested_column, istr, settings); + }; + return safeDeserialize(column, *nested_serialization, check_for_null, deserialize_nested); + } + + /// We don't have enough data in buffer to check if it's a null. + /// Use PeekableReadBuffer to make a checkpoint before checking null + /// representation and rollback if check was failed. + PeekableReadBuffer buf(istr, true); + auto check_for_null = [&buf, &null_representation]() + { + buf.setCheckpoint(); + SCOPE_EXIT(buf.dropCheckpoint()); + if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n')) + return true; + + buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested = [&nested_serialization, &settings, &buf, &null_representation, &istr] (IColumn & nested_column) + { + auto * pos = buf.position(); + if constexpr (escaped) + nested_serialization->deserializeTextEscaped(nested_column, buf, settings); + else + nested_serialization->deserializeTextRaw(nested_column, buf, settings); + /// Check that we don't have any unread data in PeekableReadBuffer own memory. + if (likely(!buf.hasUnreadData())) + return; + + /// We have some unread data in PeekableReadBuffer own memory. + /// It can happen only if there is a string instead of a number + /// or if someone uses tab or LF in TSV null_representation. + /// In the first case we cannot continue reading anyway. The second case seems to be unlikely. + if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos) + throw DB::ParsingException("TSV custom null representation containing '\\t' or '\\n' may not work correctly " + "for large input.", ErrorCodes::CANNOT_READ_ALL_DATA); + + WriteBufferFromOwnString parsed_value; + if constexpr (escaped) + nested_serialization->serializeTextEscaped(nested_column, nested_column.size() - 1, parsed_value, settings); + else + nested_serialization->serializeTextRaw(nested_column, nested_column.size() - 1, parsed_value, settings); + throw DB::ParsingException("Error while parsing \"" + std::string(pos, buf.buffer().end()) + std::string(istr.position(), std::min(size_t(10), istr.available())) + "\" as Nullable" + + " at position " + std::to_string(istr.count()) + ": got \"" + std::string(pos, buf.position() - pos) + + "\", which was deserialized as \"" + + parsed_value.str() + "\". It seems that input data is ill-formatted.", + ErrorCodes::CANNOT_READ_ALL_DATA); + }; + + return safeDeserialize(column, *nested_serialization, check_for_null, deserialize_nested); } void SerializationNullable::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -350,13 +412,30 @@ template ReturnType SerializationNullable::deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested) { - return safeDeserialize(column, *nested, - [&istr] - { - return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr) - || checkStringByFirstCharacterAndAssertTheRest("ᴺᵁᴸᴸ", istr); - }, - [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeWholeText(nested_column, istr, settings); }); + PeekableReadBuffer buf(istr, true); + auto check_for_null = [&buf]() + { + buf.setCheckpoint(); + SCOPE_EXIT(buf.dropCheckpoint()); + + if (checkStringCaseInsensitive("NULL", buf) && buf.eof()) + return true; + + buf.rollbackToCheckpoint(); + if (checkStringCaseInsensitive("ᴺᵁᴸᴸ", buf) && buf.eof()) + return true; + + buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column) + { + nested->deserializeWholeText(nested_column, buf, settings); + assert(!buf.hasUnreadData()); + }; + + return safeDeserialize(column, *nested, check_for_null, deserialize_nested); } @@ -377,74 +456,77 @@ void SerializationNullable::deserializeTextCSV(IColumn & column, ReadBuffer & is template ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, - const SerializationPtr & nested) + const SerializationPtr & nested_serialization) { - constexpr char const * null_literal = "NULL"; - constexpr size_t len = 4; - size_t null_prefix_len = 0; - - auto check_for_null = [&istr, &settings, &null_prefix_len] + const String & null_representation = settings.csv.null_representation; + if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0])) { - if (checkStringByFirstCharacterAndAssertTheRest("\\N", istr)) - return true; - if (!settings.csv.unquoted_null_literal_as_null) + /// This is not null, surely. + return safeDeserialize(column, *nested_serialization, + [] { return false; }, + [&nested_serialization, &istr, &settings] (IColumn & nested_column) { nested_serialization->deserializeTextCSV(nested_column, istr, settings); }); + } + + /// Check if we have enough data in buffer to check if it's a null. + if (istr.available() > null_representation.size()) + { + auto check_for_null = [&istr, &null_representation, &settings]() + { + auto * pos = istr.position(); + if (checkString(null_representation, istr) && (*istr.position() == settings.csv.delimiter || *istr.position() == '\r' || *istr.position() == '\n')) + return true; + istr.position() = pos; return false; - - /// Check for unquoted NULL - while (!istr.eof() && null_prefix_len < len && null_literal[null_prefix_len] == *istr.position()) + }; + auto deserialize_nested = [&nested_serialization, &settings, &istr] (IColumn & nested_column) { - ++null_prefix_len; - ++istr.position(); - } - if (null_prefix_len == len) + nested_serialization->deserializeTextCSV(nested_column, istr, settings); + }; + return safeDeserialize(column, *nested_serialization, check_for_null, deserialize_nested); + } + + /// We don't have enough data in buffer to check if it's a null. + /// Use PeekableReadBuffer to make a checkpoint before checking null + /// representation and rollback if the check was failed. + PeekableReadBuffer buf(istr, true); + auto check_for_null = [&buf, &null_representation, &settings]() + { + buf.setCheckpoint(); + SCOPE_EXIT(buf.dropCheckpoint()); + if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n')) return true; - /// Value and "NULL" have common prefix, but value is not "NULL". - /// Restore previous buffer position if possible. - if (null_prefix_len <= istr.offset()) - { - istr.position() -= null_prefix_len; - null_prefix_len = 0; - } + buf.rollbackToCheckpoint(); return false; }; - auto deserialize_nested = [&nested, &settings, &istr, &null_prefix_len] (IColumn & nested_column) + auto deserialize_nested = [&nested_serialization, &settings, &buf, &null_representation, &istr] (IColumn & nested_column) { - if (likely(!null_prefix_len)) - nested->deserializeTextCSV(nested_column, istr, settings); - else - { - /// Previous buffer position was not restored, - /// so we need to prepend extracted characters (rare case) - ReadBufferFromMemory prepend(null_literal, null_prefix_len); - ConcatReadBuffer buf(prepend, istr); - nested->deserializeTextCSV(nested_column, buf, settings); + auto * pos = buf.position(); + nested_serialization->deserializeTextCSV(nested_column, buf, settings); + /// Check that we don't have any unread data in PeekableReadBuffer own memory. + if (likely(!buf.hasUnreadData())) + return; - /// Check if all extracted characters were read by nested parser and update buffer position - if (null_prefix_len < buf.count()) - istr.position() = buf.position(); - else if (null_prefix_len > buf.count()) - { - /// It can happen only if there is an unquoted string instead of a number - /// or if someone uses 'U' or 'L' as delimiter in CSV. - /// In the first case we cannot continue reading anyway. The second case seems to be unlikely. - if (settings.csv.delimiter == 'U' || settings.csv.delimiter == 'L') - throw DB::ParsingException("Enabled setting input_format_csv_unquoted_null_literal_as_null may not work correctly " - "with format_csv_delimiter = 'U' or 'L' for large input.", ErrorCodes::CANNOT_READ_ALL_DATA); - WriteBufferFromOwnString parsed_value; - nested->serializeTextCSV(nested_column, nested_column.size() - 1, parsed_value, settings); - throw DB::ParsingException("Error while parsing \"" + std::string(null_literal, null_prefix_len) - + std::string(istr.position(), std::min(size_t{10}, istr.available())) + "\" as Nullable" - + " at position " + std::to_string(istr.count()) + ": got \"" + std::string(null_literal, buf.count()) - + "\", which was deserialized as \"" - + parsed_value.str() + "\". It seems that input data is ill-formatted.", - ErrorCodes::CANNOT_READ_ALL_DATA); - } - } + /// We have some unread data in PeekableReadBuffer own memory. + /// It can happen only if there is an unquoted string instead of a number + /// or if someone uses csv delimiter, LF or CR in CSV null representation. + /// In the first case we cannot continue reading anyway. The second case seems to be unlikely. + if (null_representation.find(settings.csv.delimiter) != std::string::npos || null_representation.find('\r') != std::string::npos + || null_representation.find('\n') != std::string::npos) + throw DB::ParsingException("CSV custom null representation containing format_csv_delimiter, '\\r' or '\\n' may not work correctly " + "for large input.", ErrorCodes::CANNOT_READ_ALL_DATA); + + WriteBufferFromOwnString parsed_value; + nested_serialization->serializeTextCSV(nested_column, nested_column.size() - 1, parsed_value, settings); + throw DB::ParsingException("Error while parsing \"" + std::string(pos, buf.buffer().end()) + std::string(istr.position(), std::min(size_t(10), istr.available())) + "\" as Nullable" + + " at position " + std::to_string(istr.count()) + ": got \"" + std::string(pos, buf.position() - pos) + + "\", which was deserialized as \"" + + parsed_value.str() + "\". It seems that input data is ill-formatted.", + ErrorCodes::CANNOT_READ_ALL_DATA); }; - return safeDeserialize(column, *nested, check_for_null, deserialize_nested); + return safeDeserialize(column, *nested_serialization, check_for_null, deserialize_nested); } void SerializationNullable::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -507,5 +589,6 @@ template bool SerializationNullable::deserializeTextEscapedImpl(IColumn & template bool SerializationNullable::deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested); template bool SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested); template bool SerializationNullable::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested); +template bool SerializationNullable::deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested); } diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index c39c4dd6573..c514234127c 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -72,6 +72,9 @@ public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + /// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false) /// If ReturnType is void, deserialize Nullable(T) template @@ -84,6 +87,10 @@ public: static ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested); template static ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested); + template + static ReturnType deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested); + template + static ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested); private: struct SubcolumnCreator : public ISubcolumnCreator diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index c3c24ed6749..5614e970315 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include @@ -9,8 +8,6 @@ #include #include -#include -#include #include #include @@ -245,7 +242,7 @@ static inline void read(IColumn & column, Reader && reader) void SerializationString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { - read(column, [&](ColumnString::Chars & data) { readStringInto(data, istr); }); + read(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); }); } diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index a5a04d277da..640371947a9 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -65,14 +65,12 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( const DictionaryStructure & dict_struct_, const Configuration & configuration_, const Block & sample_block_, - ContextMutablePtr context_, - std::shared_ptr local_session_) + ContextMutablePtr context_) : update_time{std::chrono::system_clock::from_time_t(0)} , dict_struct{dict_struct_} , configuration{configuration_} , query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks} , sample_block{sample_block_} - , local_session(local_session_) , context(context_) , pool{createPool(configuration)} , load_all_query{query_builder.composeLoadAllQuery()} @@ -86,7 +84,6 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionar , invalidate_query_response{other.invalidate_query_response} , query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks} , sample_block{other.sample_block} - , local_session(other.local_session) , context(Context::createCopy(other.context)) , pool{createPool(configuration)} , load_all_query{other.load_all_query} @@ -252,17 +249,18 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) }; ContextMutablePtr context; - std::shared_ptr local_session; if (configuration.is_local) { - /// Start local session in case when the dictionary is loaded in-process (without TCP communication). - local_session = std::make_shared(global_context, ClientInfo::Interface::LOCAL); - local_session->authenticate(configuration.user, configuration.password, {}); - context = local_session->makeQueryContext(); - context->applySettingsChanges(readSettingsFromDictionaryConfig(config, config_prefix)); + /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication). + Session session(global_context, ClientInfo::Interface::LOCAL); + session.authenticate(configuration.user, configuration.password, {}); + context = session.makeQueryContext(); } else - context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); + { + context = Context::createCopy(global_context); + } + context->applySettingsChanges(readSettingsFromDictionaryConfig(config, config_prefix)); String dictionary_name = config.getString(".dictionary.name", ""); String dictionary_database = config.getString(".dictionary.database", ""); @@ -270,7 +268,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) if (dictionary_name == configuration.table && dictionary_database == configuration.db) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ClickHouseDictionarySource table cannot be dictionary table"); - return std::make_unique(dict_struct, configuration, sample_block, context, local_session); + return std::make_unique(dict_struct, configuration, sample_block, context); }; factory.registerSource("clickhouse", create_table_source); diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index 58243e43b15..be09fa415fd 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -39,8 +39,7 @@ public: const DictionaryStructure & dict_struct_, const Configuration & configuration_, const Block & sample_block_, - ContextMutablePtr context_, - std::shared_ptr local_session_); + ContextMutablePtr context_); /// copy-constructor is provided in order to support cloneability ClickHouseDictionarySource(const ClickHouseDictionarySource & other); @@ -82,7 +81,6 @@ private: mutable std::string invalidate_query_response; ExternalQueryBuilder query_builder; Block sample_block; - std::shared_ptr local_session; ContextMutablePtr context; ConnectionPoolWithFailoverPtr pool; const std::string load_all_query; diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index f2b27c2c876..3ef3999bde4 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -24,6 +24,8 @@ public: void setReadUntilPosition(size_t position) override { impl->setReadUntilPosition(position); } + void setReadUntilEnd() override { impl->setReadUntilEnd(); } + private: ReadLock lock; }; diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index c283e0ea159..23fd353a5f0 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -59,16 +59,23 @@ String AsynchronousReadIndirectBufferFromRemoteFS::getFileName() const bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead() { - /// Position is set only for MergeTree tables. + /** + * Note: read_until_position here can be std::nullopt only for non-MergeTree tables. + * For mergeTree tables it must be guaranteed that setReadUntilPosition() or + * setReadUntilEnd() is called before any read or prefetch. + * setReadUntilEnd() always sets read_until_position to file size. + * setReadUntilPosition(pos) always has pos > 0, because if + * right_offset_in_compressed_file is 0, then setReadUntilEnd() is used. + */ if (read_until_position) { /// Everything is already read. - if (file_offset_of_buffer_end == read_until_position) + if (file_offset_of_buffer_end == *read_until_position) return false; - if (file_offset_of_buffer_end > read_until_position) + if (file_offset_of_buffer_end > *read_until_position) throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {})", - file_offset_of_buffer_end, read_until_position); + file_offset_of_buffer_end, *read_until_position); } else if (must_read_until_position) throw Exception(ErrorCodes::LOGICAL_ERROR, @@ -117,7 +124,7 @@ void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t pos throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilPosition"); read_until_position = position; - impl->setReadUntilPosition(read_until_position); + impl->setReadUntilPosition(*read_until_position); } @@ -127,7 +134,7 @@ void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilEnd() throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilEnd"); read_until_position = impl->getFileSize(); - impl->setReadUntilPosition(read_until_position); + impl->setReadUntilPosition(*read_until_position); } @@ -225,7 +232,7 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence pos = working_buffer.end(); /// Note: we read in range [file_offset_of_buffer_end, read_until_position). - if (file_offset_of_buffer_end < read_until_position + if (read_until_position && file_offset_of_buffer_end < *read_until_position && static_cast(file_offset_of_buffer_end) >= getPosition() && static_cast(file_offset_of_buffer_end) < getPosition() + static_cast(min_bytes_for_seek)) { diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index d8fad08bc8a..2e37f448fe1 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -76,7 +76,7 @@ private: size_t bytes_to_ignore = 0; - size_t read_until_position = 0; + std::optional read_until_position = 0; bool must_read_until_position; }; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index ef8bb8e0feb..3e99ca1a886 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -128,6 +128,31 @@ void throwIfError(const Aws::Utils::Outcome & response) throw Exception(err.GetMessage(), static_cast(err.GetErrorType())); } } +template +void logIfError(Aws::Utils::Outcome & response, Fn auto && msg) +{ + try + { + throwIfError(response); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, msg()); + } +} + +template +void logIfError(const Aws::Utils::Outcome & response, Fn auto && msg) +{ + try + { + throwIfError(response); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, msg()); + } +} DiskS3::DiskS3( String name_, @@ -159,15 +184,16 @@ void DiskS3::removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) if (s3_paths_keeper) s3_paths_keeper->removePaths([&](S3PathKeeper::Chunk && chunk) { - LOG_TRACE(log, "Remove AWS keys {}", S3PathKeeper::getChunkKeys(chunk)); + String keys = S3PathKeeper::getChunkKeys(chunk); + LOG_TRACE(log, "Remove AWS keys {}", keys); Aws::S3::Model::Delete delkeys; delkeys.SetObjects(chunk); - /// TODO: Make operation idempotent. Do not throw exception if key is already deleted. Aws::S3::Model::DeleteObjectsRequest request; request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = settings->client->DeleteObjects(request); - throwIfError(outcome); + // Do not throw here, continue deleting other chunks + logIfError(outcome, [&](){return "Can't remove AWS keys: " + keys;}); }); } @@ -500,9 +526,11 @@ bool DiskS3::checkUniqueId(const String & id) const Aws::S3::Model::ListObjectsV2Request request; request.SetBucket(bucket); request.SetPrefix(id); - auto resp = settings->client->ListObjectsV2(request); - throwIfError(resp); - Aws::Vector object_list = resp.GetResult().GetContents(); + + auto outcome = settings->client->ListObjectsV2(request); + throwIfError(outcome); + + Aws::Vector object_list = outcome.GetResult().GetContents(); for (const auto & object : object_list) if (object.GetKey() == id) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index d2dc18a03fd..89027fad9c9 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -52,14 +52,14 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval; format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString(); format_settings.avro.string_column_pattern = settings.output_format_avro_string_column_pattern.toString(); + format_settings.avro.output_rows_in_file = settings.output_format_avro_rows_in_file; format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes; format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line; format_settings.csv.delimiter = settings.format_csv_delimiter; - format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields; + format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default; format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number; - format_settings.csv.null_representation = settings.output_format_csv_null_representation; - format_settings.csv.unquoted_null_literal_as_null = settings.input_format_csv_unquoted_null_literal_as_null; + format_settings.csv.null_representation = settings.format_csv_null_representation; format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; format_settings.custom.escaping_rule = settings.format_custom_escaping_rule; format_settings.custom.field_delimiter = settings.format_custom_field_delimiter; @@ -102,15 +102,17 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line; format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default; format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number; - format_settings.tsv.null_representation = settings.output_format_tsv_null_representation; + format_settings.tsv.null_representation = settings.format_tsv_null_representation; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions; format_settings.with_names_use_header = settings.input_format_with_names_use_header; + format_settings.with_types_use_header = settings.input_format_with_types_use_header; format_settings.write_statistics = settings.output_format_write_statistics; format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index ee3824081bb..4e10aa4141a 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -68,7 +68,6 @@ public: size_t row)>; private: - using InputCreatorFunc = InputFormatPtr( ReadBuffer & buf, const Block & header, diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 403ccbc6763..b4f1550f0bd 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -25,10 +25,12 @@ struct FormatSettings bool skip_unknown_fields = false; bool with_names_use_header = false; + bool with_types_use_header = false; bool write_statistics = true; bool import_nested_json = false; bool null_as_default = true; bool decimal_trailing_zeros = false; + bool defaults_for_omitted_fields = true; enum class DateTimeInputFormat { @@ -64,6 +66,7 @@ struct FormatSettings UInt64 output_sync_interval = 16 * 1024; bool allow_missing_fields = false; String string_column_pattern; + UInt64 output_rows_in_file = 1; } avro; struct CSV @@ -71,7 +74,6 @@ struct FormatSettings char delimiter = ','; bool allow_single_quotes = true; bool allow_double_quotes = true; - bool unquoted_null_literal_as_null = false; bool empty_as_default = false; bool crlf_end_of_line = false; bool input_format_enum_as_number = false; diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp index 8ef05fa584e..b55e9f59cc7 100644 --- a/src/Formats/JSONEachRowUtils.cpp +++ b/src/Formats/JSONEachRowUtils.cpp @@ -1,5 +1,8 @@ #include #include +#include +#include + #include namespace DB @@ -10,7 +13,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) +template +static std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) { skipWhitespaceIfAny(in); @@ -19,7 +23,7 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D bool quotes = false; size_t number_of_rows = 0; - while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast(pos - in.position()) < min_chunk_size)) + while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast(pos - in.position()) < min_chunk_size || number_of_rows < min_rows)) { const auto current_object_size = memory.size() + static_cast(pos - in.position()); if (current_object_size > 10 * min_chunk_size) @@ -50,19 +54,19 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D } else { - pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end()); + pos = find_first_symbols(pos, in.buffer().end()); if (pos > in.buffer().end()) throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); else if (pos == in.buffer().end()) continue; - else if (*pos == '{') + else if (*pos == opening_bracket) { ++balance; ++pos; } - else if (*pos == '}') + else if (*pos == closing_bracket) { --balance; ++pos; @@ -88,6 +92,16 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D return {loadAtPosition(in, memory, pos), number_of_rows}; } +std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) +{ + return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_chunk_size, 1); +} + +std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) +{ + return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows); +} + bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) { /// For JSONEachRow we can safely skip whitespace characters @@ -95,4 +109,37 @@ bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) return buf.eof() || *buf.position() == '['; } +bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings) +{ + try + { + bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); + + if (yield_strings) + { + String str; + readJSONString(str, in); + + ReadBufferFromString buf(str); + + if (as_nullable) + return SerializationNullable::deserializeWholeTextImpl(column, buf, format_settings, serialization); + + serialization->deserializeWholeText(column, buf, format_settings); + return true; + } + + if (as_nullable) + return SerializationNullable::deserializeTextJSONImpl(column, in, format_settings, serialization); + + serialization->deserializeTextJSON(column, in, format_settings); + return true; + } + catch (Exception & e) + { + e.addMessage("(while reading the value of key " + column_name + ")"); + throw; + } +} + } diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h index 2d2d4ad5531..4a049aa1abd 100644 --- a/src/Formats/JSONEachRowUtils.h +++ b/src/Formats/JSONEachRowUtils.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -7,8 +8,11 @@ namespace DB { -std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); +std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); +std::pair fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows); bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); +bool readFieldImpl(ReadBuffer & in, IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, const String & column_name, const FormatSettings & format_settings, bool yield_strings); + } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index a95ccb8b064..1aedff5fceb 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -13,6 +13,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory); void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); void registerFileSegmentationEngineRegexp(FormatFactory & factory); void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); +void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory); /// Formats for both input/output. @@ -87,6 +88,7 @@ void registerFormats() registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineRegexp(factory); registerFileSegmentationEngineJSONAsString(factory); + registerFileSegmentationEngineJSONCompactEachRow(factory); registerInputFormatNative(factory); registerOutputFormatNative(factory); diff --git a/src/Formats/registerWithNamesAndTypes.cpp b/src/Formats/registerWithNamesAndTypes.cpp new file mode 100644 index 00000000000..cba578b08c7 --- /dev/null +++ b/src/Formats/registerWithNamesAndTypes.cpp @@ -0,0 +1,13 @@ +#include + +namespace DB +{ + +void registerWithNamesAndTypes(const std::string & base_format_name, RegisterWithNamesAndTypesFunc register_func) +{ + register_func(base_format_name, false, false); + register_func(base_format_name + "WithNames", true, false); + register_func(base_format_name + "WithNamesAndTypes", true, true); +} + +} diff --git a/src/Formats/registerWithNamesAndTypes.h b/src/Formats/registerWithNamesAndTypes.h new file mode 100644 index 00000000000..d8e74e3421e --- /dev/null +++ b/src/Formats/registerWithNamesAndTypes.h @@ -0,0 +1,12 @@ +#pragma once + +#include +#include + +namespace DB +{ + +using RegisterWithNamesAndTypesFunc = std::function; +void registerWithNamesAndTypes(const std::string & base_format_name, RegisterWithNamesAndTypesFunc register_func); + +} diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 7697d86dc34..7dc47b54bea 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/src/Functions/FunctionsStringArray.cpp b/src/Functions/FunctionsStringArray.cpp index 0c76cde701a..0e73d6a33f5 100644 --- a/src/Functions/FunctionsStringArray.cpp +++ b/src/Functions/FunctionsStringArray.cpp @@ -1,21 +1,6 @@ #include #include -namespace -{ -bool isNullableStringOrNullableNothing(DB::DataTypePtr type) -{ - if (type->isNullable()) - { - const auto & nested_type = assert_cast(*type).getNestedType(); - if (isString(nested_type) || isNothing(nested_type)) - return true; - } - return false; -} - -} - namespace DB { namespace ErrorCodes @@ -33,11 +18,8 @@ DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const DataTypes & argum ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); - // An array consisting of only Null-s has type Array(Nullable(Nothing)) - if (!array_type || !(isString(array_type->getNestedType()) || isNullableStringOrNullableNothing(array_type->getNestedType()))) - throw Exception( - "First argument for function " + getName() + " must be an array of String-s or Nullable(String)-s.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (!array_type) + throw Exception("First argument for function " + getName() + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (arguments.size() == 2 && !isString(arguments[1])) throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h index c15a8db8186..27907626971 100644 --- a/src/Functions/FunctionsStringArray.h +++ b/src/Functions/FunctionsStringArray.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,6 @@ #include #include - namespace DB { @@ -648,7 +648,7 @@ public: }; -/// Joins an array of strings into one string via a separator. +/// Joins an array of type serializable to string into one string via a separator. class FunctionArrayStringConcat : public IFunction { private: @@ -734,6 +734,25 @@ private: null_map); } + static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type) + { + if (isString(nested_type)) + { + return col_arr.getDataPtr(); + } + else if (const ColumnNullable * col_nullable = checkAndGetColumn(col_arr.getData()); + col_nullable && isString(col_nullable->getNestedColumn().getDataType())) + { + return col_nullable->getNestedColumnPtr(); + } + else + { + ColumnsWithTypeAndName cols; + cols.emplace_back(col_arr.getDataPtr(), nested_type, "tmp"); + return ConvertImplGenericToString::execute(cols, std::make_shared()); + } + } + public: static constexpr auto name = "arrayStringConcat"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } @@ -761,7 +780,9 @@ public: delimiter = col_delim->getValue(); } - if (const ColumnConst * col_const_arr = checkAndGetColumnConst(arguments[0].column.get())) + const auto & nested_type = assert_cast(*arguments[0].type).getNestedType(); + if (const ColumnConst * col_const_arr = checkAndGetColumnConst(arguments[0].column.get()); + col_const_arr && isString(nested_type)) { Array src_arr = col_const_arr->getValue(); String dst_str; @@ -778,25 +799,19 @@ public: return result_type->createColumnConst(col_const_arr->size(), dst_str); } + + ColumnPtr src_column = arguments[0].column->convertToFullColumnIfConst(); + const ColumnArray & col_arr = assert_cast(*src_column.get()); + + ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type); + const ColumnString & col_string = assert_cast(*str_subcolumn.get()); + + auto col_res = ColumnString::create(); + if (const ColumnNullable * col_nullable = checkAndGetColumn(col_arr.getData())) + executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data()); else - { - const ColumnArray & col_arr = assert_cast(*arguments[0].column); - auto col_res = ColumnString::create(); - if (WhichDataType(col_arr.getData().getDataType()).isString()) - { - const ColumnString & col_string = assert_cast(col_arr.getData()); - executeInternal(col_string, col_arr, delimiter, *col_res); - } - else - { - const ColumnNullable & col_nullable = assert_cast(col_arr.getData()); - if (const ColumnString * col_string = typeid_cast(col_nullable.getNestedColumnPtr().get())) - executeInternal(*col_string, col_arr, delimiter, *col_res, col_nullable.getNullMapData().data()); - else - col_res->insertManyDefaults(col_arr.size()); - } - return col_res; - } + executeInternal(col_string, col_arr, delimiter, *col_res); + return col_res; } }; diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp index c7f047eb8fb..0ea3bfbd013 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -17,22 +17,17 @@ namespace DB bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type != TokenType::Dot) - { return false; - } + ++pos; - if (pos->type != TokenType::BareWord) - { + if (pos->type != TokenType::BareWord && pos->type !=TokenType::QuotedIdentifier) return false; - } ParserIdentifier name_p; ASTPtr member_name; if (!name_p.parse(pos, member_name, expected)) - { return false; - } auto member_access = std::make_shared(); node = member_access; diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp index d513a8767dc..6ef1800d913 100644 --- a/src/Functions/addressToLine.cpp +++ b/src/Functions/addressToLine.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/addressToSymbol.cpp b/src/Functions/addressToSymbol.cpp index 1561e0ee506..0fd25503ec7 100644 --- a/src/Functions/addressToSymbol.cpp +++ b/src/Functions/addressToSymbol.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/currentProfiles.cpp b/src/Functions/currentProfiles.cpp index c578268160e..849cd026de8 100644 --- a/src/Functions/currentProfiles.cpp +++ b/src/Functions/currentProfiles.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace explicit FunctionCurrentProfiles(const ContextPtr & context) { - const auto & manager = context->getAccessControlManager(); + const auto & manager = context->getAccessControl(); std::vector profile_ids; if constexpr (kind == Kind::CURRENT_PROFILES) diff --git a/src/Functions/currentRoles.cpp b/src/Functions/currentRoles.cpp index c2545edd002..f176f51f6c9 100644 --- a/src/Functions/currentRoles.cpp +++ b/src/Functions/currentRoles.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -47,7 +47,7 @@ namespace else { static_assert(kind == Kind::DEFAULT_ROLES); - const auto & manager = context->getAccessControlManager(); + const auto & manager = context->getAccessControl(); if (auto user = context->getUser()) role_names = manager.tryReadNames(user->granted_roles.findGranted(user->default_roles)); } diff --git a/src/Functions/demange.cpp b/src/Functions/demange.cpp index 0f50eb5e141..ecf6661d20d 100644 --- a/src/Functions/demange.cpp +++ b/src/Functions/demange.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp index ee9696cf34f..3f9d3e782d7 100644 --- a/src/Functions/formatRow.cpp +++ b/src/Functions/formatRow.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -18,6 +19,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int UNKNOWN_FORMAT; + extern const int BAD_ARGUMENTS; } namespace @@ -70,6 +72,11 @@ public: writeChar('\0', buffer); offsets[row] = buffer.count(); }); + + /// This function make sense only for row output formats. + if (!dynamic_cast(out.get())) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot turn rows into a {} format strings. {} function supports only row output formats", format_name, getName()); + out->write(arg_columns); return col_str; } diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp index c7cef777afc..40929acd848 100644 --- a/src/IO/PeekableReadBuffer.cpp +++ b/src/IO/PeekableReadBuffer.cpp @@ -9,7 +9,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ /*= DBMS_DEFAULT_BUFFER_SIZE*/) +PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ /*= 0*/) : BufferWithOwnMemory(start_size_), sub_buf(sub_buf_) { padded &= sub_buf.isPadded(); @@ -27,6 +27,7 @@ void PeekableReadBuffer::reset() peeked_size = 0; checkpoint = std::nullopt; checkpoint_in_own_memory = false; + use_stack_memory = true; if (!currentlyReadFromOwnMemory()) sub_buf.position() = pos; @@ -72,21 +73,23 @@ bool PeekableReadBuffer::peekNext() sub_buf.position() = copy_from; } + char * memory_data = getMemoryData(); + /// Save unread data from sub-buffer to own memory - memcpy(memory.data() + peeked_size, sub_buf.position(), bytes_to_copy); + memcpy(memory_data + peeked_size, sub_buf.position(), bytes_to_copy); /// If useSubbufferOnly() is false, then checkpoint is in own memory and it was updated in resizeOwnMemoryIfNecessary /// Otherwise, checkpoint now at the beginning of own memory if (checkpoint && useSubbufferOnly()) { - checkpoint.emplace(memory.data()); + checkpoint.emplace(memory_data); checkpoint_in_own_memory = true; } if (currentlyReadFromOwnMemory()) { /// Update buffer size - BufferBase::set(memory.data(), peeked_size + bytes_to_copy, offset()); + BufferBase::set(memory_data, peeked_size + bytes_to_copy, offset()); } else { @@ -99,7 +102,7 @@ bool PeekableReadBuffer::peekNext() else pos_offset = 0; } - BufferBase::set(memory.data(), peeked_size + bytes_to_copy, pos_offset); + BufferBase::set(memory_data, peeked_size + bytes_to_copy, pos_offset); } peeked_size += bytes_to_copy; @@ -125,8 +128,9 @@ void PeekableReadBuffer::rollbackToCheckpoint(bool drop) /// Checkpoint is in own memory and position is not. assert(checkpointInOwnMemory()); + char * memory_data = getMemoryData(); /// Switch to reading from own memory. - BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data()); + BufferBase::set(memory_data, peeked_size, *checkpoint - memory_data); } if (drop) @@ -224,12 +228,31 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append) bool need_update_pos = currentlyReadFromOwnMemory(); size_t offset = 0; if (need_update_checkpoint) - offset = *checkpoint - memory.data(); + { + char * memory_data = getMemoryData(); + offset = *checkpoint - memory_data; + } else if (need_update_pos) offset = this->offset(); size_t new_size = peeked_size + bytes_to_append; - if (memory.size() < new_size) + + if (use_stack_memory) + { + /// If stack memory is still enough, do nothing. + if (sizeof(stack_memory) >= new_size) + return; + + /// Stack memory is not enough, allocate larger buffer. + use_stack_memory = false; + memory.resize(std::max(size_t(DBMS_DEFAULT_BUFFER_SIZE), new_size)); + memcpy(memory.data(), stack_memory, sizeof(stack_memory)); + if (need_update_checkpoint) + checkpoint.emplace(memory.data() + offset); + if (need_update_pos) + BufferBase::set(memory.data(), peeked_size, pos - stack_memory); + } + else if (memory.size() < new_size) { if (bytes_to_append < offset && 2 * (peeked_size - offset) <= memory.size()) { @@ -273,10 +296,11 @@ void PeekableReadBuffer::makeContinuousMemoryFromCheckpointToPos() size_t bytes_to_append = pos - sub_buf.position(); resizeOwnMemoryIfNecessary(bytes_to_append); - memcpy(memory.data() + peeked_size, sub_buf.position(), bytes_to_append); + char * memory_data = getMemoryData(); + memcpy(memory_data + peeked_size, sub_buf.position(), bytes_to_append); sub_buf.position() = pos; peeked_size += bytes_to_append; - BufferBase::set(memory.data(), peeked_size, peeked_size); + BufferBase::set(memory_data, peeked_size, peeked_size); } PeekableReadBuffer::~PeekableReadBuffer() @@ -287,7 +311,7 @@ PeekableReadBuffer::~PeekableReadBuffer() bool PeekableReadBuffer::hasUnreadData() const { - return peeked_size && pos != memory.data() + peeked_size; + return peeked_size && pos != getMemoryData() + peeked_size; } } diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index 4515c6f8ce5..f22987d9daa 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -20,7 +20,7 @@ class PeekableReadBuffer : public BufferWithOwnMemory { friend class PeekableReadBufferCheckpoint; public: - explicit PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ = DBMS_DEFAULT_BUFFER_SIZE); + explicit PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ = 0); ~PeekableReadBuffer() override; @@ -84,11 +84,21 @@ private: /// Updates all invalidated pointers and sizes. void resizeOwnMemoryIfNecessary(size_t bytes_to_append); + char * getMemoryData() { return use_stack_memory ? stack_memory : memory.data(); } + const char * getMemoryData() const { return use_stack_memory ? stack_memory : memory.data(); } + ReadBuffer & sub_buf; size_t peeked_size = 0; std::optional checkpoint = std::nullopt; bool checkpoint_in_own_memory = false; + + /// To prevent expensive and in some cases unnecessary memory allocations on PeekableReadBuffer + /// creation (for example if PeekableReadBuffer is often created or if we need to remember small amount of + /// data after checkpoint), at the beginning we will use small amount of memory on stack and allocate + /// larger buffer only if reserved memory is not enough. + char stack_memory[16]; + bool use_stack_memory = true; }; diff --git a/src/Interpreters/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp similarity index 93% rename from src/Interpreters/InterpreterCreateQuotaQuery.cpp rename to src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp index b4f61e43186..703615972c4 100644 --- a/src/Interpreters/InterpreterCreateQuotaQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateQuotaQuery.cpp @@ -1,10 +1,10 @@ -#include -#include -#include +#include +#include +#include +#include +#include #include #include -#include -#include #include #include #include @@ -73,7 +73,7 @@ namespace BlockIO InterpreterCreateQuotaQuery::execute() { auto & query = query_ptr->as(); - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); getContext()->checkAccess(query.alter ? AccessType::ALTER_QUOTA : AccessType::CREATE_QUOTA); if (!query.cluster.empty()) diff --git a/src/Interpreters/InterpreterCreateQuotaQuery.h b/src/Interpreters/Access/InterpreterCreateQuotaQuery.h similarity index 100% rename from src/Interpreters/InterpreterCreateQuotaQuery.h rename to src/Interpreters/Access/InterpreterCreateQuotaQuery.h diff --git a/src/Interpreters/InterpreterCreateRoleQuery.cpp b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp similarity index 92% rename from src/Interpreters/InterpreterCreateRoleQuery.cpp rename to src/Interpreters/Access/InterpreterCreateRoleQuery.cpp index b9debc259be..d623d510ffd 100644 --- a/src/Interpreters/InterpreterCreateRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp @@ -1,9 +1,9 @@ -#include -#include +#include +#include +#include +#include #include #include -#include -#include namespace DB @@ -34,7 +34,7 @@ namespace BlockIO InterpreterCreateRoleQuery::execute() { const auto & query = query_ptr->as(); - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); if (query.alter) getContext()->checkAccess(AccessType::ALTER_ROLE); else diff --git a/src/Interpreters/InterpreterCreateRoleQuery.h b/src/Interpreters/Access/InterpreterCreateRoleQuery.h similarity index 100% rename from src/Interpreters/InterpreterCreateRoleQuery.h rename to src/Interpreters/Access/InterpreterCreateRoleQuery.h diff --git a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp similarity index 90% rename from src/Interpreters/InterpreterCreateRowPolicyQuery.cpp rename to src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp index 5e4b9b30e66..37347b37619 100644 --- a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp @@ -1,12 +1,12 @@ -#include -#include -#include -#include +#include +#include +#include +#include #include +#include +#include #include #include -#include -#include #include @@ -44,7 +44,7 @@ namespace BlockIO InterpreterCreateRowPolicyQuery::execute() { auto & query = query_ptr->as(); - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); getContext()->checkAccess(query.alter ? AccessType::ALTER_ROW_POLICY : AccessType::CREATE_ROW_POLICY); if (!query.cluster.empty()) diff --git a/src/Interpreters/InterpreterCreateRowPolicyQuery.h b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.h similarity index 100% rename from src/Interpreters/InterpreterCreateRowPolicyQuery.h rename to src/Interpreters/Access/InterpreterCreateRowPolicyQuery.h diff --git a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp similarity index 91% rename from src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp rename to src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp index fb5fb258b10..95c2a58388a 100644 --- a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp @@ -1,11 +1,11 @@ -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include namespace DB @@ -42,7 +42,7 @@ namespace BlockIO InterpreterCreateSettingsProfileQuery::execute() { auto & query = query_ptr->as(); - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); if (query.alter) getContext()->checkAccess(AccessType::ALTER_SETTINGS_PROFILE); else diff --git a/src/Interpreters/InterpreterCreateSettingsProfileQuery.h b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.h similarity index 100% rename from src/Interpreters/InterpreterCreateSettingsProfileQuery.h rename to src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.h diff --git a/src/Interpreters/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp similarity index 92% rename from src/Interpreters/InterpreterCreateUserQuery.cpp rename to src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 6f963a3b338..33d85afb7c3 100644 --- a/src/Interpreters/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -1,14 +1,14 @@ -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include +#include #include +#include +#include +#include +#include #include @@ -31,8 +31,8 @@ namespace else if (query.names->size() == 1) user.setName(query.names->front()->toString()); - if (query.authentication) - user.authentication = *query.authentication; + if (query.auth_data) + user.auth_data = *query.auth_data; if (override_name && !override_name->host_pattern.empty()) { @@ -79,7 +79,7 @@ namespace BlockIO InterpreterCreateUserQuery::execute() { const auto & query = query_ptr->as(); - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); auto access = getContext()->getAccess(); access->checkAccess(query.alter ? AccessType::ALTER_USER : AccessType::CREATE_USER); diff --git a/src/Interpreters/InterpreterCreateUserQuery.h b/src/Interpreters/Access/InterpreterCreateUserQuery.h similarity index 100% rename from src/Interpreters/InterpreterCreateUserQuery.h rename to src/Interpreters/Access/InterpreterCreateUserQuery.h diff --git a/src/Interpreters/InterpreterDropAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp similarity index 86% rename from src/Interpreters/InterpreterDropAccessEntityQuery.cpp rename to src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp index a9b8db6d74e..c00bbe4f379 100644 --- a/src/Interpreters/InterpreterDropAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp @@ -1,15 +1,15 @@ -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include #include #include +#include +#include namespace DB @@ -25,7 +25,7 @@ using EntityType = IAccessEntity::Type; BlockIO InterpreterDropAccessEntityQuery::execute() { auto & query = query_ptr->as(); - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); getContext()->checkAccess(getRequiredAccess()); if (!query.cluster.empty()) diff --git a/src/Interpreters/InterpreterDropAccessEntityQuery.h b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.h similarity index 100% rename from src/Interpreters/InterpreterDropAccessEntityQuery.h rename to src/Interpreters/Access/InterpreterDropAccessEntityQuery.h diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp similarity index 97% rename from src/Interpreters/InterpreterGrantQuery.cpp rename to src/Interpreters/Access/InterpreterGrantQuery.cpp index 506ab8a3387..e17af6877be 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -1,16 +1,17 @@ -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include #include #include -#include +#include +#include +#include #include #include +#include namespace DB { @@ -51,7 +52,7 @@ namespace /// Extracts roles which are going to be granted or revoked from a query. void collectRolesToGrantOrRevoke( - const AccessControlManager & access_control, + const AccessControl & access_control, const ASTGrantQuery & query, std::vector & roles_to_grant, RolesOrUsersSet & roles_to_revoke) @@ -121,7 +122,7 @@ namespace } /// Checks if grantees are allowed for the current user, throws an exception if not. - void checkGranteesAreAllowed(const AccessControlManager & access_control, const ContextAccess & current_user_access, const std::vector & grantee_ids) + void checkGranteesAreAllowed(const AccessControl & access_control, const ContextAccess & current_user_access, const std::vector & grantee_ids) { auto current_user = current_user_access.getUser(); if (!current_user || (current_user->grantees == RolesOrUsersSet::AllTag{})) @@ -139,7 +140,7 @@ namespace /// Checks if the current user has enough access rights granted with grant option to grant or revoke specified access rights. void checkGrantOption( - const AccessControlManager & access_control, + const AccessControl & access_control, const ContextAccess & current_user_access, const std::vector & grantees_from_query, bool & need_check_grantees_are_allowed, @@ -205,7 +206,7 @@ namespace /// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles. void checkAdminOption( - const AccessControlManager & access_control, + const AccessControl & access_control, const ContextAccess & current_user_access, const std::vector & grantees_from_query, bool & need_check_grantees_are_allowed, @@ -382,7 +383,7 @@ BlockIO InterpreterGrantQuery::execute() if (!query.access_rights_elements.empty() && query.access_rights_elements[0].is_partial_revoke && !query.is_revoke) throw Exception("A partial revoke should be revoked, not granted", ErrorCodes::LOGICAL_ERROR); - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); auto current_user_access = getContext()->getAccess(); std::vector grantees = RolesOrUsersSet{*query.grantees, access_control, getContext()->getUserID()}.getMatchingIDs(access_control); diff --git a/src/Interpreters/InterpreterGrantQuery.h b/src/Interpreters/Access/InterpreterGrantQuery.h similarity index 100% rename from src/Interpreters/InterpreterGrantQuery.h rename to src/Interpreters/Access/InterpreterGrantQuery.h diff --git a/src/Interpreters/InterpreterSetRoleQuery.cpp b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp similarity index 89% rename from src/Interpreters/InterpreterSetRoleQuery.cpp rename to src/Interpreters/Access/InterpreterSetRoleQuery.cpp index 057ccd447ef..6acb9ab5e19 100644 --- a/src/Interpreters/InterpreterSetRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp @@ -1,10 +1,10 @@ -#include -#include -#include -#include +#include +#include +#include #include -#include +#include #include +#include namespace DB @@ -28,7 +28,7 @@ BlockIO InterpreterSetRoleQuery::execute() void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query) { - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); auto session_context = getContext()->getSessionContext(); auto user = session_context->getUser(); @@ -62,7 +62,7 @@ void InterpreterSetRoleQuery::setDefaultRole(const ASTSetRoleQuery & query) { getContext()->checkAccess(AccessType::ALTER_USER); - auto & access_control = getContext()->getAccessControlManager(); + auto & access_control = getContext()->getAccessControl(); std::vector to_users = RolesOrUsersSet{*query.to_users, access_control, getContext()->getUserID()}.getMatchingIDs(access_control); RolesOrUsersSet roles_from_query{*query.roles, access_control}; diff --git a/src/Interpreters/InterpreterSetRoleQuery.h b/src/Interpreters/Access/InterpreterSetRoleQuery.h similarity index 100% rename from src/Interpreters/InterpreterSetRoleQuery.h rename to src/Interpreters/Access/InterpreterSetRoleQuery.h diff --git a/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp similarity index 96% rename from src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp rename to src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp index 41b986e43a2..b0fe28e1abd 100644 --- a/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp @@ -1,10 +1,10 @@ -#include -#include +#include +#include #include -#include #include #include #include +#include namespace DB diff --git a/src/Interpreters/InterpreterShowAccessEntitiesQuery.h b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.h similarity index 100% rename from src/Interpreters/InterpreterShowAccessEntitiesQuery.h rename to src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.h diff --git a/src/Interpreters/InterpreterShowAccessQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp similarity index 89% rename from src/Interpreters/InterpreterShowAccessQuery.cpp rename to src/Interpreters/Access/InterpreterShowAccessQuery.cpp index 86ab409d82b..a385f6c8d7a 100644 --- a/src/Interpreters/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp @@ -1,14 +1,14 @@ -#include +#include #include #include -#include -#include +#include +#include #include #include #include -#include -#include +#include +#include #include #include #include @@ -49,7 +49,7 @@ QueryPipeline InterpreterShowAccessQuery::executeImpl() const std::vector InterpreterShowAccessQuery::getEntities() const { - const auto & access_control = getContext()->getAccessControlManager(); + const auto & access_control = getContext()->getAccessControl(); getContext()->checkAccess(AccessType::SHOW_ACCESS); std::vector entities; @@ -71,7 +71,7 @@ std::vector InterpreterShowAccessQuery::getEntities() const ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const { auto entities = getEntities(); - const auto & access_control = getContext()->getAccessControlManager(); + const auto & access_control = getContext()->getAccessControl(); ASTs create_queries, grant_queries; for (const auto & entity : entities) diff --git a/src/Interpreters/InterpreterShowAccessQuery.h b/src/Interpreters/Access/InterpreterShowAccessQuery.h similarity index 100% rename from src/Interpreters/InterpreterShowAccessQuery.h rename to src/Interpreters/Access/InterpreterShowAccessQuery.h diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp similarity index 86% rename from src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp rename to src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 7be7032f48a..ca6003e2cc0 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -1,29 +1,29 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include +#include #include #include #include #include #include #include -#include -#include #include #include +#include +#include +#include #include #include @@ -40,7 +40,7 @@ namespace { ASTPtr getCreateQueryImpl( const User & user, - const AccessControlManager * manager /* not used if attach_mode == true */, + const AccessControl * access_control /* not used if attach_mode == true */, bool attach_mode) { auto query = std::make_shared(); @@ -56,12 +56,12 @@ namespace if (attach_mode) query->default_roles = user.default_roles.toAST(); else - query->default_roles = user.default_roles.toASTWithNames(*manager); + query->default_roles = user.default_roles.toASTWithNames(*access_control); } - if (user.authentication.getType() != Authentication::NO_PASSWORD) + if (user.auth_data.getType() != AuthenticationType::NO_PASSWORD) { - query->authentication = user.authentication; + query->auth_data = user.auth_data; query->show_password = attach_mode; /// We don't show password unless it's an ATTACH statement. } @@ -70,7 +70,7 @@ namespace if (attach_mode) query->settings = user.settings.toAST(); else - query->settings = user.settings.toASTWithNames(*manager); + query->settings = user.settings.toASTWithNames(*access_control); } if (user.grantees != RolesOrUsersSet::AllTag{}) @@ -78,7 +78,7 @@ namespace if (attach_mode) query->grantees = user.grantees.toAST(); else - query->grantees = user.grantees.toASTWithNames(*manager); + query->grantees = user.grantees.toASTWithNames(*access_control); query->grantees->use_keyword_any = true; } @@ -93,7 +93,7 @@ namespace } - ASTPtr getCreateQueryImpl(const Role & role, const AccessControlManager * manager, bool attach_mode) + ASTPtr getCreateQueryImpl(const Role & role, const AccessControl * access_control, bool attach_mode) { auto query = std::make_shared(); query->names.emplace_back(role.getName()); @@ -104,14 +104,14 @@ namespace if (attach_mode) query->settings = role.settings.toAST(); else - query->settings = role.settings.toASTWithNames(*manager); + query->settings = role.settings.toASTWithNames(*access_control); } return query; } - ASTPtr getCreateQueryImpl(const SettingsProfile & profile, const AccessControlManager * manager, bool attach_mode) + ASTPtr getCreateQueryImpl(const SettingsProfile & profile, const AccessControl * access_control, bool attach_mode) { auto query = std::make_shared(); query->names.emplace_back(profile.getName()); @@ -122,7 +122,7 @@ namespace if (attach_mode) query->settings = profile.elements.toAST(); else - query->settings = profile.elements.toASTWithNames(*manager); + query->settings = profile.elements.toASTWithNames(*access_control); if (query->settings) query->settings->setUseInheritKeyword(true); } @@ -132,7 +132,7 @@ namespace if (attach_mode) query->to_roles = profile.to_roles.toAST(); else - query->to_roles = profile.to_roles.toASTWithNames(*manager); + query->to_roles = profile.to_roles.toASTWithNames(*access_control); } return query; @@ -141,7 +141,7 @@ namespace ASTPtr getCreateQueryImpl( const Quota & quota, - const AccessControlManager * manager /* not used if attach_mode == true */, + const AccessControl * access_control /* not used if attach_mode == true */, bool attach_mode) { auto query = std::make_shared(); @@ -168,7 +168,7 @@ namespace if (attach_mode) query->roles = quota.to_roles.toAST(); else - query->roles = quota.to_roles.toASTWithNames(*manager); + query->roles = quota.to_roles.toASTWithNames(*access_control); } return query; @@ -177,7 +177,7 @@ namespace ASTPtr getCreateQueryImpl( const RowPolicy & policy, - const AccessControlManager * manager /* not used if attach_mode == true */, + const AccessControl * access_control /* not used if attach_mode == true */, bool attach_mode) { auto query = std::make_shared(); @@ -204,7 +204,7 @@ namespace if (attach_mode) query->roles = policy.to_roles.toAST(); else - query->roles = policy.to_roles.toASTWithNames(*manager); + query->roles = policy.to_roles.toASTWithNames(*access_control); } return query; @@ -212,19 +212,19 @@ namespace ASTPtr getCreateQueryImpl( const IAccessEntity & entity, - const AccessControlManager * manager /* not used if attach_mode == true */, + const AccessControl * access_control /* not used if attach_mode == true */, bool attach_mode) { if (const User * user = typeid_cast(&entity)) - return getCreateQueryImpl(*user, manager, attach_mode); + return getCreateQueryImpl(*user, access_control, attach_mode); if (const Role * role = typeid_cast(&entity)) - return getCreateQueryImpl(*role, manager, attach_mode); + return getCreateQueryImpl(*role, access_control, attach_mode); if (const RowPolicy * policy = typeid_cast(&entity)) - return getCreateQueryImpl(*policy, manager, attach_mode); + return getCreateQueryImpl(*policy, access_control, attach_mode); if (const Quota * quota = typeid_cast(&entity)) - return getCreateQueryImpl(*quota, manager, attach_mode); + return getCreateQueryImpl(*quota, access_control, attach_mode); if (const SettingsProfile * profile = typeid_cast(&entity)) - return getCreateQueryImpl(*profile, manager, attach_mode); + return getCreateQueryImpl(*profile, access_control, attach_mode); throw Exception(entity.outputTypeAndName() + ": type is not supported by SHOW CREATE query", ErrorCodes::NOT_IMPLEMENTED); } @@ -277,7 +277,7 @@ QueryPipeline InterpreterShowCreateAccessEntityQuery::executeImpl() std::vector InterpreterShowCreateAccessEntityQuery::getEntities() const { auto & show_query = query_ptr->as(); - const auto & access_control = getContext()->getAccessControlManager(); + const auto & access_control = getContext()->getAccessControl(); getContext()->checkAccess(getRequiredAccess()); show_query.replaceEmptyDatabase(getContext()->getCurrentDatabase()); std::vector entities; @@ -348,7 +348,7 @@ ASTs InterpreterShowCreateAccessEntityQuery::getCreateQueries() const auto entities = getEntities(); ASTs list; - const auto & access_control = getContext()->getAccessControlManager(); + const auto & access_control = getContext()->getAccessControl(); for (const auto & entity : entities) list.push_back(getCreateQuery(*entity, access_control)); @@ -356,7 +356,7 @@ ASTs InterpreterShowCreateAccessEntityQuery::getCreateQueries() const } -ASTPtr InterpreterShowCreateAccessEntityQuery::getCreateQuery(const IAccessEntity & entity, const AccessControlManager & access_control) +ASTPtr InterpreterShowCreateAccessEntityQuery::getCreateQuery(const IAccessEntity & entity, const AccessControl & access_control) { return getCreateQueryImpl(entity, &access_control, false); } diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.h similarity index 93% rename from src/Interpreters/InterpreterShowCreateAccessEntityQuery.h rename to src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.h index 0aedeb18be4..9d84e68568b 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.h @@ -7,7 +7,7 @@ namespace DB { -class AccessControlManager; +class AccessControl; class Context; class AccessRightsElements; struct IAccessEntity; @@ -26,7 +26,7 @@ public: bool ignoreQuota() const override { return true; } bool ignoreLimits() const override { return true; } - static ASTPtr getCreateQuery(const IAccessEntity & entity, const AccessControlManager & access_control); + static ASTPtr getCreateQuery(const IAccessEntity & entity, const AccessControl & access_control); static ASTPtr getAttachQuery(const IAccessEntity & entity); private: diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp similarity index 89% rename from src/Interpreters/InterpreterShowGrantsQuery.cpp rename to src/Interpreters/Access/InterpreterShowGrantsQuery.cpp index 7302e893cdd..788856dbfe0 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp @@ -1,16 +1,16 @@ -#include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include +#include #include #include +#include +#include +#include +#include +#include #include #include @@ -27,7 +27,7 @@ namespace template ASTs getGrantQueriesImpl( const T & grantee, - const AccessControlManager * manager /* not used if attach_mode == true */, + const AccessControl * access_control /* not used if attach_mode == true */, bool attach_mode = false) { ASTs res; @@ -75,7 +75,7 @@ namespace if (attach_mode) grant_query->roles = RolesOrUsersSet{element.ids}.toAST(); else - grant_query->roles = RolesOrUsersSet{element.ids}.toASTWithNames(*manager); + grant_query->roles = RolesOrUsersSet{element.ids}.toASTWithNames(*access_control); res.push_back(std::move(grant_query)); } @@ -84,13 +84,13 @@ namespace ASTs getGrantQueriesImpl( const IAccessEntity & entity, - const AccessControlManager * manager /* not used if attach_mode == true */, + const AccessControl * access_control /* not used if attach_mode == true */, bool attach_mode = false) { if (const User * user = typeid_cast(&entity)) - return getGrantQueriesImpl(*user, manager, attach_mode); + return getGrantQueriesImpl(*user, access_control, attach_mode); if (const Role * role = typeid_cast(&entity)) - return getGrantQueriesImpl(*role, manager, attach_mode); + return getGrantQueriesImpl(*role, access_control, attach_mode); throw Exception(entity.outputTypeAndName() + " is expected to be user or role", ErrorCodes::LOGICAL_ERROR); } @@ -136,7 +136,7 @@ QueryPipeline InterpreterShowGrantsQuery::executeImpl() std::vector InterpreterShowGrantsQuery::getEntities() const { const auto & show_query = query_ptr->as(); - const auto & access_control = getContext()->getAccessControlManager(); + const auto & access_control = getContext()->getAccessControl(); auto ids = RolesOrUsersSet{*show_query.for_roles, access_control, getContext()->getUserID()}.getMatchingIDs(access_control); std::vector entities; @@ -155,7 +155,7 @@ std::vector InterpreterShowGrantsQuery::getEntities() const ASTs InterpreterShowGrantsQuery::getGrantQueries() const { auto entities = getEntities(); - const auto & access_control = getContext()->getAccessControlManager(); + const auto & access_control = getContext()->getAccessControl(); ASTs grant_queries; for (const auto & entity : entities) @@ -165,7 +165,7 @@ ASTs InterpreterShowGrantsQuery::getGrantQueries() const } -ASTs InterpreterShowGrantsQuery::getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control) +ASTs InterpreterShowGrantsQuery::getGrantQueries(const IAccessEntity & user_or_role, const AccessControl & access_control) { return getGrantQueriesImpl(user_or_role, &access_control, false); } diff --git a/src/Interpreters/InterpreterShowGrantsQuery.h b/src/Interpreters/Access/InterpreterShowGrantsQuery.h similarity index 91% rename from src/Interpreters/InterpreterShowGrantsQuery.h rename to src/Interpreters/Access/InterpreterShowGrantsQuery.h index 06bdcf169b1..bab147c279e 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.h +++ b/src/Interpreters/Access/InterpreterShowGrantsQuery.h @@ -8,7 +8,7 @@ namespace DB { -class AccessControlManager; +class AccessControl; class ASTShowGrantsQuery; struct IAccessEntity; using AccessEntityPtr = std::shared_ptr; @@ -20,7 +20,7 @@ public: BlockIO execute() override; - static ASTs getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control); + static ASTs getGrantQueries(const IAccessEntity & user_or_role, const AccessControl & access_control); static ASTs getAttachGrantQueries(const IAccessEntity & user_or_role); bool ignoreQuota() const override { return true; } diff --git a/src/Interpreters/InterpreterShowPrivilegesQuery.cpp b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp similarity index 84% rename from src/Interpreters/InterpreterShowPrivilegesQuery.cpp rename to src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp index 201c1cfece8..05aa74d7dc4 100644 --- a/src/Interpreters/InterpreterShowPrivilegesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/src/Interpreters/InterpreterShowPrivilegesQuery.h b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h similarity index 100% rename from src/Interpreters/InterpreterShowPrivilegesQuery.h rename to src/Interpreters/Access/InterpreterShowPrivilegesQuery.h diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 2f86c8bef28..d5d2b1a722d 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7f31df9159c..f2d449c04d7 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -206,7 +206,7 @@ struct ContextSharedPart String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying - std::unique_ptr access_control_manager; + std::unique_ptr access_control; mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. @@ -279,7 +279,7 @@ struct ContextSharedPart Context::ConfigReloadCallback config_reload_callback; ContextSharedPart() - : access_control_manager(std::make_unique()), macros(std::make_unique()) + : access_control(std::make_unique()), macros(std::make_unique()) { /// TODO: make it singleton (?) static std::atomic num_calls{0}; @@ -371,7 +371,7 @@ struct ContextSharedPart distributed_schedule_pool.reset(); message_broker_schedule_pool.reset(); ddl_worker.reset(); - access_control_manager.reset(); + access_control.reset(); /// Stop trace collector if any trace_collector.reset(); @@ -635,7 +635,7 @@ void Context::setConfig(const ConfigurationPtr & config) { auto lock = getLock(); shared->config = config; - shared->access_control_manager->setExternalAuthenticatorsConfig(*shared->config); + shared->access_control->setExternalAuthenticatorsConfig(*shared->config); } const Poco::Util::AbstractConfiguration & Context::getConfigRef() const @@ -645,33 +645,33 @@ const Poco::Util::AbstractConfiguration & Context::getConfigRef() const } -AccessControlManager & Context::getAccessControlManager() +AccessControl & Context::getAccessControl() { - return *shared->access_control_manager; + return *shared->access_control; } -const AccessControlManager & Context::getAccessControlManager() const +const AccessControl & Context::getAccessControl() const { - return *shared->access_control_manager; + return *shared->access_control; } void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { auto lock = getLock(); - shared->access_control_manager->setExternalAuthenticatorsConfig(config); + shared->access_control->setExternalAuthenticatorsConfig(config); } std::unique_ptr Context::makeGSSAcceptorContext() const { auto lock = getLock(); - return std::make_unique(shared->access_control_manager->getExternalAuthenticators().getKerberosParams()); + return std::make_unique(shared->access_control->getExternalAuthenticators().getKerberosParams()); } void Context::setUsersConfig(const ConfigurationPtr & config) { auto lock = getLock(); shared->users_config = config; - shared->access_control_manager->setUsersConfig(*shared->users_config); + shared->access_control->setUsersConfig(*shared->users_config); } ConfigurationPtr Context::getUsersConfig() @@ -686,7 +686,7 @@ void Context::setUser(const UUID & user_id_) user_id = user_id_; - access = getAccessControlManager().getContextAccess( + access = getAccessControl().getContextAccess( user_id_, /* current_roles = */ {}, /* use_default_roles = */ true, settings, current_database, client_info); auto user = access->getUser(); @@ -759,7 +759,7 @@ void Context::calculateAccessRights() { auto lock = getLock(); if (user_id) - access = getAccessControlManager().getContextAccess( + access = getAccessControl().getContextAccess( *user_id, current_roles ? *current_roles : std::vector{}, /* use_default_roles = */ false, @@ -808,10 +808,10 @@ void Context::setInitialRowPolicy() initial_row_policy = nullptr; if (client_info.initial_user == client_info.current_user) return; - auto initial_user_id = getAccessControlManager().find(client_info.initial_user); + auto initial_user_id = getAccessControl().find(client_info.initial_user); if (!initial_user_id) return; - initial_row_policy = getAccessControlManager().getEnabledRowPolicies(*initial_user_id, {}); + initial_row_policy = getAccessControl().getEnabledRowPolicies(*initial_user_id, {}); } @@ -832,7 +832,7 @@ void Context::setCurrentProfile(const String & profile_name) auto lock = getLock(); try { - UUID profile_id = getAccessControlManager().getID(profile_name); + UUID profile_id = getAccessControl().getID(profile_name); setCurrentProfile(profile_id); } catch (Exception & e) @@ -845,7 +845,7 @@ void Context::setCurrentProfile(const String & profile_name) void Context::setCurrentProfile(const UUID & profile_id) { auto lock = getLock(); - auto profile_info = getAccessControlManager().getSettingsProfileInfo(profile_id); + auto profile_info = getAccessControl().getSettingsProfileInfo(profile_id); checkSettingsConstraints(profile_info->settings); applySettingsChanges(profile_info->settings); settings_constraints_and_current_profiles = profile_info->getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); @@ -1153,7 +1153,7 @@ std::shared_ptr Context::getSettingsCons auto lock = getLock(); if (settings_constraints_and_current_profiles) return settings_constraints_and_current_profiles; - static auto no_constraints_or_profiles = std::make_shared(getAccessControlManager()); + static auto no_constraints_or_profiles = std::make_shared(getAccessControl()); return no_constraints_or_profiles; } @@ -2641,7 +2641,7 @@ void Context::setApplicationType(ApplicationType type) void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & config) { shared->default_profile_name = config.getString("default_profile", "default"); - getAccessControlManager().setDefaultProfileName(shared->default_profile_name); + getAccessControl().setDefaultProfileName(shared->default_profile_name); shared->system_profile_name = config.getString("system_profile", shared->default_profile_name); setCurrentProfile(shared->system_profile_name); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index cc6df875f7d..7d31a8375d8 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -85,7 +85,7 @@ class ActionLocksManager; using ActionLocksManagerPtr = std::shared_ptr; class ShellCommand; class ICompressionCodec; -class AccessControlManager; +class AccessControl; class Credentials; class GSSAcceptorContext; struct SettingsConstraintsAndProfileIDs; @@ -363,8 +363,8 @@ public: void setConfig(const ConfigurationPtr & config); const Poco::Util::AbstractConfiguration & getConfigRef() const; - AccessControlManager & getAccessControlManager(); - const AccessControlManager & getAccessControlManager() const; + AccessControl & getAccessControl(); + const AccessControl & getAccessControl() const; /// Sets external authenticators config (LDAP, Kerberos). void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); @@ -381,7 +381,6 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - /// Normally you shouldn't call this function. Use the Session class to do authentication instead. void setUser(const UUID & user_id_); UserPtr getUser() const; diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp index 4bbb8974fe9..df99c0d11ed 100644 --- a/src/Interpreters/FillingRow.cpp +++ b/src/Interpreters/FillingRow.cpp @@ -64,7 +64,7 @@ bool FillingRow::next(const FillingRow & to_row) continue; auto next_value = row[i]; - applyVisitor(FieldVisitorSum(getFillDescription(i).fill_step), next_value); + getFillDescription(i).step_func(next_value); if (less(next_value, getFillDescription(i).fill_to, getDirection(i))) { row[i] = next_value; @@ -74,7 +74,7 @@ bool FillingRow::next(const FillingRow & to_row) } auto next_value = row[pos]; - applyVisitor(FieldVisitorSum(getFillDescription(pos).fill_step), next_value); + getFillDescription(pos).step_func(next_value); if (less(to_row[pos], next_value, getDirection(pos))) return false; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 1d112a7c548..6ba9e7505f2 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -27,6 +27,7 @@ #include #include #include + namespace DB { @@ -289,13 +290,11 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s if (table_join->getDictionaryReader()) { assert(disjuncts_num == 1); - LOG_DEBUG(log, "Performing join over dict"); data->type = Type::DICT; data->maps.resize(disjuncts_num); std::get(data->maps[0]).create(Type::DICT); - key_sizes.resize(1); - chooseMethod(key_columns, key_sizes[0]); /// init key_sizes + chooseMethod(kind, key_columns, key_sizes.emplace_back()); /// init key_sizes } else if (strictness == ASTTableJoin::Strictness::Asof) { @@ -321,13 +320,13 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s /// Therefore, add it back in such that it can be extracted appropriately from the full stored /// key_columns and key_sizes auto & asof_key_sizes = key_sizes.emplace_back(); - data->type = chooseMethod(key_columns, asof_key_sizes); + data->type = chooseMethod(kind, key_columns, asof_key_sizes); asof_key_sizes.push_back(asof_size); } else { /// Choose data structure to use for JOIN. - auto current_join_method = chooseMethod(key_columns, key_sizes.emplace_back()); + auto current_join_method = chooseMethod(kind, key_columns, key_sizes.emplace_back()); if (data->type == Type::EMPTY) data->type = current_join_method; else if (data->type != current_join_method) @@ -337,14 +336,20 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s for (auto & maps : data->maps) dataMapInit(maps); + + LOG_DEBUG(log, "Join type: {}, kind: {}, strictness: {}", data->type, kind, strictness); } -HashJoin::Type HashJoin::chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes) +HashJoin::Type HashJoin::chooseMethod(ASTTableJoin::Kind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes) { size_t keys_size = key_columns.size(); if (keys_size == 0) - return Type::CROSS; + { + if (isCrossOrComma(kind)) + return Type::CROSS; + return Type::EMPTY; + } bool all_fixed = true; size_t keys_bytes = 0; @@ -446,6 +451,23 @@ private: std::vector positions; }; +/// Dummy key getter, always find nothing, used for JOIN ON NULL +template +class KeyGetterEmpty +{ +public: + struct MappedType + { + using mapped_type = Mapped; + }; + + using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl; + + KeyGetterEmpty() = default; + + FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); } +}; + template struct KeyGetterForTypeImpl; @@ -723,8 +745,6 @@ Block HashJoin::structureRightBlock(const Block & block) const bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) { - if (empty()) - throw Exception("Logical error: HashJoin was not initialized", ErrorCodes::LOGICAL_ERROR); if (overDictionary()) throw Exception("Logical error: insert into hash-map in HashJoin over dictionary", ErrorCodes::LOGICAL_ERROR); @@ -777,15 +797,15 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) auto join_mask_col = JoinCommon::getColumnAsMask(block, onexprs[onexpr_idx].condColumnNames().second); /// Save blocks that do not hold conditions in ON section ColumnUInt8::MutablePtr not_joined_map = nullptr; - if (!multiple_disjuncts && isRightOrFull(kind) && join_mask_col) + if (!multiple_disjuncts && isRightOrFull(kind) && !join_mask_col.isConstant()) { - const auto & join_mask = assert_cast(*join_mask_col).getData(); + const auto & join_mask = join_mask_col.getData(); /// Save rows that do not hold conditions not_joined_map = ColumnUInt8::create(block.rows(), 0); - for (size_t i = 0, sz = join_mask.size(); i < sz; ++i) + for (size_t i = 0, sz = join_mask->size(); i < sz; ++i) { /// Condition hold, do not save row - if (join_mask[i]) + if ((*join_mask)[i]) continue; /// NULL key will be saved anyway because, do not save twice @@ -802,7 +822,8 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) { size_t size = insertFromBlockImpl( *this, data->type, map, rows, key_columns, key_sizes[onexpr_idx], stored_block, null_map, - join_mask_col ? &assert_cast(*join_mask_col).getData() : nullptr, + /// If mask is false constant, rows are added to hashmap anyway. It's not a happy-flow, so this case is not optimized + join_mask_col.getData(), data->pool); if (multiple_disjuncts) @@ -846,7 +867,7 @@ struct JoinOnKeyColumns ColumnPtr null_map_holder; /// Only rows where mask == true can be joined - ColumnPtr join_mask_column; + JoinCommon::JoinMask join_mask_column; Sizes key_sizes; @@ -859,17 +880,10 @@ struct JoinOnKeyColumns , null_map_holder(extractNestedColumnsAndNullMap(key_columns, null_map)) , join_mask_column(JoinCommon::getColumnAsMask(block, cond_column_name)) , key_sizes(key_sizes_) - {} - - bool isRowFiltered(size_t i) const { - if (join_mask_column) - { - UInt8ColumnDataPtr mask = &assert_cast(*(join_mask_column)).getData(); - return !(*mask)[i]; - } - return false; } + + bool isRowFiltered(size_t i) const { return join_mask_column.isRowFiltered(i); } }; class AddedColumns @@ -985,6 +999,7 @@ public: const IColumn & leftAsofKey() const { return *left_asof_key; } std::vector join_on_keys; + size_t rows_to_add; std::unique_ptr offsets_to_replicate; bool need_filter = false; @@ -998,6 +1013,7 @@ private: std::optional asof_type; ASOF::Inequality asof_inequality; const IColumn * left_asof_key = nullptr; + bool is_join_get; void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) @@ -1373,12 +1389,28 @@ IColumn::Filter switchJoinRightColumns( constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; switch (type) { + case HashJoin::Type::EMPTY: + { + if constexpr (!is_asof_join) + { + using KeyGetter = KeyGetterEmpty; + std::vector key_getter_vector; + key_getter_vector.emplace_back(); + + using MapTypeVal = typename KeyGetter::MappedType; + std::vector a_map_type_vector; + a_map_type_vector.emplace_back(); + return joinRightColumnsSwitchNullability( + std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); + } + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); + } #define M(TYPE) \ case HashJoin::Type::TYPE: \ { \ using MapTypeVal = const typename std::remove_reference_t::element_type; \ using KeyGetter = typename KeyGetterForType::Type; \ - std::vector a_map_type_vector(mapv.size()); \ + std::vector a_map_type_vector(mapv.size()); \ std::vector key_getter_vector; \ for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ { \ @@ -1393,7 +1425,7 @@ IColumn::Filter switchJoinRightColumns( #undef M default: - throw Exception("Unsupported JOIN keys. Type: " + toString(static_cast(type)), ErrorCodes::UNSUPPORTED_JOIN_KEYS); + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); } } @@ -1828,7 +1860,7 @@ class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller { public: NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_) - : parent(parent_), max_block_size(max_block_size_) + : parent(parent_), max_block_size(max_block_size_), current_block_start(0) {} Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); } @@ -1836,13 +1868,20 @@ public: size_t fillColumns(MutableColumns & columns_right) override { size_t rows_added = 0; - auto fill_callback = [&](auto, auto strictness, auto & map) + if (unlikely(parent.data->type == HashJoin::Type::EMPTY)) { - rows_added = fillColumnsFromMap(map, columns_right); - }; + rows_added = fillColumnsFromData(parent.data->blocks, columns_right); + } + else + { + auto fill_callback = [&](auto, auto strictness, auto & map) + { + rows_added = fillColumnsFromMap(map, columns_right); + }; - if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); + if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); + } if constexpr (!multiple_disjuncts) { @@ -1856,10 +1895,48 @@ private: const HashJoin & parent; UInt64 max_block_size; + size_t current_block_start; + std::any position; std::optional nulls_position; std::optional used_position; + size_t fillColumnsFromData(const BlocksList & blocks, MutableColumns & columns_right) + { + if (!position.has_value()) + position = std::make_any(blocks.begin()); + + auto & block_it = std::any_cast(position); + auto end = blocks.end(); + + size_t rows_added = 0; + for (; block_it != end; ++block_it) + { + size_t rows_from_block = std::min(max_block_size - rows_added, block_it->rows() - current_block_start); + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & col = block_it->getByPosition(j).column; + columns_right[j]->insertRangeFrom(*col, current_block_start, rows_from_block); + } + rows_added += rows_from_block; + + if (rows_added >= max_block_size) + { + /// How many rows have been read + current_block_start += rows_from_block; + if (block_it->rows() <= current_block_start) + { + /// current block was fully read + ++block_it; + current_block_start = 0; + } + break; + } + current_block_start = 0; + } + return rows_added; + } + template size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) { @@ -1871,8 +1948,7 @@ private: APPLY_FOR_JOIN_VARIANTS(M) #undef M default: - throw Exception("Unsupported JOIN keys. Type: " + toString(static_cast(parent.data->type)), - ErrorCodes::UNSUPPORTED_JOIN_KEYS); + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type) ; } __builtin_unreachable(); @@ -1949,12 +2025,14 @@ private: for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it) { - const Block * block = it->first; - const NullMap & nullmap = assert_cast(*it->second).getData(); + const auto * block = it->first; + ConstNullMapPtr nullmap = nullptr; + if (it->second) + nullmap = &assert_cast(*it->second).getData(); - for (size_t row = 0; row < nullmap.size(); ++row) + for (size_t row = 0; row < block->rows(); ++row) { - if (nullmap[row]) + if (nullmap && (*nullmap)[row]) { for (size_t col = 0; col < columns_keys_and_right.size(); ++col) columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row); diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index f41f63a6a55..50c8b2c55ee 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -231,6 +231,7 @@ public: template struct MapsTemplate { + using MappedType = Mapped; std::unique_ptr> key8; std::unique_ptr> key16; std::unique_ptr>> key32; @@ -411,7 +412,7 @@ private: void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const; - static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); + static Type chooseMethod(ASTTableJoin::Kind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes); bool empty() const; bool overDictionary() const; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 729a495987f..b620ddf6a1e 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 9fd318ee4cf..b52b91f47eb 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6d38c55bd62..f1db1f771f3 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -41,7 +41,7 @@ #include #include -#include +#include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 89d27a30555..03c4b4ae1b6 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 5370aee1096..638c671c3a3 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 4fbad7e5471..13a376dff8d 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 24c30a8be30..5af51c61b29 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index fcf5f19aef6..e9ee2b0910a 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -2,17 +2,10 @@ #include #include #include -#include -#include -#include -#include -#include #include -#include #include #include #include -#include #include #include #include @@ -21,12 +14,6 @@ #include #include #include -#include -#include -#include -#include -#include -#include #include #include #include @@ -34,26 +21,33 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include #include #include #include #include #include -#include -#include -#include -#include -#include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -62,13 +56,7 @@ #include #include #include -#include -#include -#include -#include #include -#include -#include #include #include #include @@ -76,6 +64,20 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 6a1a8652b23..231eb15b02f 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp index 64de5ee0479..a44a49ec020 100644 --- a/src/Interpreters/InterpreterOptimizeQuery.cpp +++ b/src/Interpreters/InterpreterOptimizeQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index e3d52487a52..72d7e9b1cba 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 0050df1bf52..fc6e193fc6e 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -10,7 +11,7 @@ #include #include -#include +#include #include #include @@ -715,7 +716,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl() static Field getWithFillFieldValue(const ASTPtr & node, ContextPtr context) { - const auto & [field, type] = evaluateConstantExpression(node, context); + auto [field, type] = evaluateConstantExpression(node, context); if (!isColumnedAsNumber(type)) throw Exception("Illegal type " + type->getName() + " of WITH FILL expression, must be numeric type", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); @@ -723,6 +724,19 @@ static Field getWithFillFieldValue(const ASTPtr & node, ContextPtr context) return field; } +static std::pair> getWithFillStep(const ASTPtr & node, ContextPtr context) +{ + auto [field, type] = evaluateConstantExpression(node, context); + + if (const auto * type_interval = typeid_cast(type.get())) + return std::make_pair(std::move(field), type_interval->getKind()); + + if (isColumnedAsNumber(type)) + return std::make_pair(std::move(field), std::nullopt); + + throw Exception("Illegal type " + type->getName() + " of WITH FILL expression, must be numeric type", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); +} + static FillColumnDescription getWithFillDescription(const ASTOrderByElement & order_by_elem, ContextPtr context) { FillColumnDescription descr; @@ -730,8 +744,9 @@ static FillColumnDescription getWithFillDescription(const ASTOrderByElement & or descr.fill_from = getWithFillFieldValue(order_by_elem.fill_from, context); if (order_by_elem.fill_to) descr.fill_to = getWithFillFieldValue(order_by_elem.fill_to, context); + if (order_by_elem.fill_step) - descr.fill_step = getWithFillFieldValue(order_by_elem.fill_step, context); + std::tie(descr.fill_step, descr.step_kind) = getWithFillStep(order_by_elem.fill_step, context); else descr.fill_step = order_by_elem.direction; diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 30a417f6fa7..84dbae0fac5 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e34d974fa80..69bf036ae97 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterUseQuery.cpp b/src/Interpreters/InterpreterUseQuery.cpp index 626d2f499c7..d8a5ae57470 100644 --- a/src/Interpreters/InterpreterUseQuery.cpp +++ b/src/Interpreters/InterpreterUseQuery.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index e5e447562c6..4e30c3d21a4 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include #include #include -#include +#include #include diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 7fb9f1a3ceb..48d5f5254ff 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -50,12 +50,12 @@ ColumnWithTypeAndName condtitionColumnToJoinable(const Block & block, const Stri if (!src_column_name.empty()) { - auto mask_col = JoinCommon::getColumnAsMask(block, src_column_name); - assert(mask_col); - const auto & mask_data = assert_cast(*mask_col).getData(); - - for (size_t i = 0; i < res_size; ++i) - null_map->getData()[i] = !mask_data[i]; + auto join_mask = JoinCommon::getColumnAsMask(block, src_column_name); + if (!join_mask.isConstant()) + { + for (size_t i = 0; i < res_size; ++i) + null_map->getData()[i] = join_mask.isRowFiltered(i); + } } ColumnPtr res_col = ColumnNullable::create(std::move(data_col), std::move(null_map)); @@ -477,6 +477,7 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right , max_joined_block_rows(table_join->maxJoinedBlockRows()) , max_rows_in_right_block(table_join->maxRowsInRightBlock()) , max_files_to_merge(table_join->maxFilesToMerge()) + , log(&Poco::Logger::get("MergeJoin")) { switch (table_join->strictness()) { @@ -549,6 +550,8 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right makeSortAndMerge(key_names_left, left_sort_description, left_merge_description); makeSortAndMerge(key_names_right, right_sort_description, right_merge_description); + LOG_DEBUG(log, "Joining keys: left [{}], right [{}]", fmt::join(key_names_left, ", "), fmt::join(key_names_right, ", ")); + /// Temporary disable 'partial_merge_join_left_table_buffer_bytes' without 'partial_merge_join_optimizations' if (table_join->enablePartialMergeJoinOptimizations()) if (size_t max_bytes = table_join->maxBytesInLeftBuffer()) diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 0e2e771255d..2cf287fd2fd 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -118,6 +118,8 @@ private: Names lowcard_right_keys; + Poco::Logger * log; + void changeLeftColumns(Block & block, MutableColumns && columns) const; void addRightColumns(Block & block, MutableColumns && columns); diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 020d297a6b9..39d2abc9b43 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include @@ -246,6 +246,7 @@ void Session::shutdownNamedSessions() Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_) : auth_id(UUIDHelpers::generateV4()), global_context(global_context_), + interface(interface_), log(&Poco::Logger::get(String{magic_enum::enum_name(interface_)} + "-Session")) { prepared_client_info.emplace(); @@ -271,12 +272,12 @@ Session::~Session() } } -Authentication::Type Session::getAuthenticationType(const String & user_name) const +AuthenticationType Session::getAuthenticationType(const String & user_name) const { - return global_context->getAccessControlManager().read(user_name)->authentication.getType(); + return global_context->getAccessControl().read(user_name)->auth_data.getType(); } -Authentication::Type Session::getAuthenticationTypeOrLogInFailure(const String & user_name) const +AuthenticationType Session::getAuthenticationTypeOrLogInFailure(const String & user_name) const { try { @@ -310,7 +311,7 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So try { - user_id = global_context->getAccessControlManager().login(credentials_, address.host()); + user_id = global_context->getAccessControl().login(credentials_, address.host()); LOG_DEBUG(log, "{} Authenticated with global context as user {}", toString(auth_id), user_id ? toString(*user_id) : ""); } @@ -418,6 +419,11 @@ ContextMutablePtr Session::makeQueryContext(ClientInfo && query_client_info) con std::shared_ptr Session::getSessionLog() const { + /// For the LOCAL interface we don't send events to the session log + /// because the LOCAL interface is internal, it does nothing with networking. + if (interface == ClientInfo::Interface::LOCAL) + return nullptr; + // take it from global context, since it outlives the Session and always available. // please note that server may have session_log disabled, hence this may return nullptr. return global_context->getSessionLog(); diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index f3cae33d752..71964130412 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -14,7 +14,7 @@ namespace Poco::Net { class SocketAddress; } namespace DB { class Credentials; -class Authentication; +class AuthenticationData; struct NamedSessionData; class NamedSessionsStorage; struct User; @@ -41,10 +41,10 @@ public: Session& operator=(const Session &) = delete; /// Provides information about the authentication type of a specified user. - Authentication::Type getAuthenticationType(const String & user_name) const; + AuthenticationType getAuthenticationType(const String & user_name) const; /// Same as getAuthenticationType, but adds LoginFailure event in case of error. - Authentication::Type getAuthenticationTypeOrLogInFailure(const String & user_name) const; + AuthenticationType getAuthenticationTypeOrLogInFailure(const String & user_name) const; /// Sets the current user, checks the credentials and that the specified address is allowed to connect from. /// The function throws an exception if there is no such user or password is wrong. @@ -79,6 +79,7 @@ private: mutable bool notified_session_log_about_login = false; const UUID auth_id; const ContextPtr global_context; + const ClientInfo::Interface interface; /// ClientInfo that will be copied to a session context when it's created. std::optional prepared_client_info; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index a4847d4c492..f9419088df8 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -45,7 +45,7 @@ auto eventTime() return std::make_pair(time_in_seconds(finish_time), time_in_microseconds(finish_time)); } -using AuthType = Authentication::Type; +using AuthType = AuthenticationType; using Interface = ClientInfo::Interface; void fillColumnArray(const Strings & data, IColumn & column) @@ -84,7 +84,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() {"Logout", static_cast(SESSION_LOGOUT)} }); -#define AUTH_TYPE_NAME_AND_VALUE(v) std::make_pair(Authentication::TypeInfo::get(v).raw_name, static_cast(v)) +#define AUTH_TYPE_NAME_AND_VALUE(v) std::make_pair(AuthenticationTypeInfo::get(v).raw_name, static_cast(v)) const auto identified_with_column = std::make_shared( DataTypeEnum8::Values { @@ -152,7 +152,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() void SessionLogElement::appendToBlock(MutableColumns & columns) const { assert(type >= SESSION_LOGIN_FAILURE && type <= SESSION_LOGOUT); - assert(user_identified_with >= Authentication::Type::NO_PASSWORD && user_identified_with <= Authentication::Type::MAX_TYPE); + assert(user_identified_with >= AuthenticationType::NO_PASSWORD && user_identified_with <= AuthenticationType::MAX); size_t i = 0; @@ -214,8 +214,8 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses { const auto user = access->getUser(); log_entry.user = user->getName(); - log_entry.user_identified_with = user->authentication.getType(); - log_entry.external_auth_server = user->authentication.getLDAPServerName(); + log_entry.user_identified_with = user->auth_data.getType(); + log_entry.external_auth_server = user->auth_data.getLDAPServerName(); } if (session_id) @@ -244,7 +244,7 @@ void SessionLog::addLoginFailure( log_entry.user = user; log_entry.auth_failure_reason = reason.message(); log_entry.client_info = info; - log_entry.user_identified_with = Authentication::Type::NO_PASSWORD; + log_entry.user_identified_with = AuthenticationType::NO_PASSWORD; add(log_entry); } diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 6d302c74d5f..93766d685e0 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB { @@ -42,7 +42,7 @@ struct SessionLogElement Decimal64 event_time_microseconds{}; String user; - Authentication::Type user_identified_with = Authentication::Type::NO_PASSWORD; + AuthenticationType user_identified_with = AuthenticationType::NO_PASSWORD; String external_auth_server; Strings roles; Strings profiles; diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index ad16cc5af7f..e657bf38e49 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -108,6 +108,16 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_) { } +void TableJoin::resetKeys() +{ + clauses.clear(); + + key_asts_left.clear(); + key_asts_right.clear(); + left_type_map.clear(); + right_type_map.clear(); +} + void TableJoin::resetCollected() { clauses.clear(); @@ -224,6 +234,13 @@ Names TableJoin::requiredJoinedNames() const for (const auto & joined_column : columns_added_by_join) required_columns_set.insert(joined_column.name); + /* + * In case of `SELECT count() FROM ... JOIN .. ON NULL` required columns set for right table is empty. + * But we have to get at least one column from right table to know the number of rows. + */ + if (required_columns_set.empty() && !columns_from_joined_table.empty()) + return {columns_from_joined_table.begin()->name}; + return Names(required_columns_set.begin(), required_columns_set.end()); } @@ -352,9 +369,7 @@ bool TableJoin::sameStrictnessAndKind(ASTTableJoin::Strictness strictness_, ASTT bool TableJoin::oneDisjunct() const { - if (!isCrossOrComma(kind())) - assert(!clauses.empty()); - return clauses.size() <= 1; + return clauses.size() == 1; } bool TableJoin::allowMergeJoin() const @@ -650,4 +665,10 @@ void TableJoin::assertHasOneOnExpr() const } } +void TableJoin::resetToCross() +{ + this->resetKeys(); + this->table_join.kind = ASTTableJoin::Kind::Cross; +} + } diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 7cd53442ffd..956fed99fb8 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -48,7 +48,6 @@ enum class JoinTableSide class TableJoin { - public: using NameToTypeMap = std::unordered_map; @@ -285,6 +284,10 @@ public: Block getRequiredRightKeys(const Block & right_table_keys, std::vector & keys_sources) const; String renamedRightColumnName(const String & name) const; + + void resetKeys(); + void resetToCross(); + std::unordered_map leftToRightKeyRemap() const; void setStorageJoin(std::shared_ptr storage); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index e0968b7fce4..ef8446eb4b9 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -33,6 +35,8 @@ #include #include +#include +#include #include #include @@ -564,9 +568,68 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul out_table_join = table_join; } +/// Evaluate expression and return boolean value if it can be interpreted as bool. +/// Only UInt8 or NULL are allowed. +/// Returns `false` for 0 or NULL values, `true` for any non-negative value. +std::optional tryEvaluateConstCondition(ASTPtr expr, ContextPtr context) +{ + if (!expr) + return {}; + + Field eval_res; + DataTypePtr eval_res_type; + try + { + std::tie(eval_res, eval_res_type) = evaluateConstantExpression(expr, context); + } + catch (DB::Exception &) + { + /// not a constant expression + return {}; + } + /// UInt8, maybe Nullable, maybe LowCardinality, and NULL are allowed + eval_res_type = removeNullable(removeLowCardinality(eval_res_type)); + if (auto which = WhichDataType(eval_res_type); !which.isUInt8() && !which.isNothing()) + return {}; + + if (eval_res.isNull()) + return false; + + UInt8 res = eval_res.template safeGet(); + return res > 0; +} + +bool tryJoinOnConst(TableJoin & analyzed_join, ASTPtr & on_expression, ContextPtr context) +{ + bool join_on_value; + if (auto eval_const_res = tryEvaluateConstCondition(on_expression, context)) + join_on_value = *eval_const_res; + else + return false; + + if (!analyzed_join.forceHashJoin()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "JOIN ON constant ({}) supported only with join algorithm 'hash'", + queryToString(on_expression)); + + on_expression = nullptr; + if (join_on_value) + { + LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as cross join"); + analyzed_join.resetToCross(); + } + else + { + LOG_DEBUG(&Poco::Logger::get("TreeRewriter"), "Join on constant executed as empty join"); + analyzed_join.resetKeys(); + } + + return true; +} + /// Find the columns that are obtained by JOIN. -void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_join, - const TablesWithColumns & tables, const Aliases & aliases) +void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join, + const TablesWithColumns & tables, const Aliases & aliases, ContextPtr context) { assert(tables.size() >= 2); @@ -599,29 +662,41 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTTableJoin & table_ assert(analyzed_join.oneDisjunct()); } - if (analyzed_join.getClauses().empty()) + auto check_keys_empty = [] (auto e) { return e.key_names_left.empty(); }; + + /// All clauses should to have keys or be empty simultaneously + bool all_keys_empty = std::all_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty); + if (all_keys_empty) + { + /// Try join on constant (cross or empty join) or fail + if (is_asof) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression)); + + bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context); + if (!join_on_const_ok) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression)); + } + else + { + bool any_keys_empty = std::any_of(analyzed_join.getClauses().begin(), analyzed_join.getClauses().end(), check_keys_empty); + + if (any_keys_empty) throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot get JOIN keys from JOIN ON section: '{}'", queryToString(table_join.on_expression)); - for (const auto & onexpr : analyzed_join.getClauses()) - { - if (onexpr.key_names_left.empty()) - throw DB::Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Cannot get JOIN keys from JOIN ON section: '{}'", - queryToString(table_join.on_expression)); + if (is_asof) + { + if (!analyzed_join.oneDisjunct()) + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join doesn't support multiple ORs for keys in JOIN ON section"); + data.asofToJoinKeys(); + } + + if (!analyzed_join.oneDisjunct() && !analyzed_join.forceHashJoin()) + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); } - - if (is_asof) - { - if (!analyzed_join.oneDisjunct()) - throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join doesn't support multiple ORs for keys in JOIN ON section"); - data.asofToJoinKeys(); - } - - if (!analyzed_join.oneDisjunct() && !analyzed_join.forceHashJoin()) - throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); - } } @@ -1052,7 +1127,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( auto * table_join_ast = select_query->join() ? select_query->join()->table_join->as() : nullptr; if (table_join_ast && tables_with_columns.size() >= 2) - collectJoinedColumns(*result.analyzed_join, *table_join_ast, tables_with_columns, result.aliases); + collectJoinedColumns(*result.analyzed_join, *table_join_ast, tables_with_columns, result.aliases, getContext()); result.aggregates = getAggregates(query, *select_query); result.window_function_asts = getWindowFunctions(query, *select_query); @@ -1175,7 +1250,7 @@ void TreeRewriter::normalize( // if we have at least two different functions. E.g. we will replace sum(x) // and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will // be calculated only once because of CSE. - if (settings.optimize_fuse_sum_count_avg || settings.optimize_syntax_fuse_functions) + if (settings.optimize_fuse_sum_count_avg && settings.optimize_syntax_fuse_functions) { FuseSumCountAggregatesVisitor::Data data; FuseSumCountAggregatesVisitor(data).visit(query); diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index c3b8cc5c677..46ec6f776ee 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index bf5d30437ec..bf20bef6992 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -1,18 +1,18 @@ #include +#include #include #include -#include #include #include #include -#include - #include #include +#include + namespace DB { @@ -492,23 +492,27 @@ bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type) return left_type_strict->equals(*right_type_strict); } -ColumnPtr getColumnAsMask(const Block & block, const String & column_name) +JoinMask getColumnAsMask(const Block & block, const String & column_name) { if (column_name.empty()) - return nullptr; + return JoinMask(true); const auto & src_col = block.getByName(column_name); DataTypePtr col_type = recursiveRemoveLowCardinality(src_col.type); if (isNothing(col_type)) - return ColumnUInt8::create(block.rows(), 0); + return JoinMask(false); - const auto & join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst()); + if (const auto * const_cond = checkAndGetColumn(*src_col.column)) + { + return JoinMask(const_cond->getBool(0)); + } + ColumnPtr join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst()); if (const auto * nullable_col = typeid_cast(join_condition_col.get())) { if (isNothing(assert_cast(*col_type).getNestedType())) - return ColumnUInt8::create(block.rows(), 0); + return JoinMask(false); /// Return nested column with NULL set to false const auto & nest_col = assert_cast(nullable_col->getNestedColumn()); @@ -517,10 +521,10 @@ ColumnPtr getColumnAsMask(const Block & block, const String & column_name) auto res = ColumnUInt8::create(nullable_col->size(), 0); for (size_t i = 0, sz = nullable_col->size(); i < sz; ++i) res->getData()[i] = !null_map.getData()[i] && nest_col.getData()[i]; - return res; + return JoinMask(std::move(res)); } else - return join_condition_col; + return JoinMask(std::move(join_condition_col)); } @@ -580,11 +584,10 @@ NotJoinedBlocks::NotJoinedBlocks(std::unique_ptr filler_, } if (column_indices_left.size() + column_indices_right.size() + same_result_keys.size() != result_sample_block.columns()) - throw Exception("Error in columns mapping in RIGHT|FULL JOIN. Left: " + toString(column_indices_left.size()) + - ", right: " + toString(column_indices_right.size()) + - ", same: " + toString(same_result_keys.size()) + - ", result: " + toString(result_sample_block.columns()), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Error in columns mapping in RIGHT|FULL JOIN. Left: {}, right: {}, same: {}, result: {}", + column_indices_left.size(), column_indices_right.size(), + same_result_keys.size(), result_sample_block.columns()); } void NotJoinedBlocks::setRightIndex(size_t right_pos, size_t result_position) diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h index 32264d57d33..d3d2a442e41 100644 --- a/src/Interpreters/join_common.h +++ b/src/Interpreters/join_common.h @@ -19,6 +19,46 @@ using UInt8ColumnDataPtr = const ColumnUInt8::Container *; namespace JoinCommon { + +/// Store boolean column handling constant value without materializing +/// Behaves similar to std::variant, but provides more convenient specialized interface +class JoinMask +{ +public: + explicit JoinMask(bool value) + : column(nullptr) + , const_value(value) + {} + + explicit JoinMask(ColumnPtr col) + : column(col) + , const_value(false) + {} + + bool isConstant() { return !column; } + + /// Return data if mask is not constant + UInt8ColumnDataPtr getData() + { + if (column) + return &assert_cast(*column).getData(); + return nullptr; + } + + inline bool isRowFiltered(size_t row) const + { + if (column) + return !assert_cast(*column).getData()[row]; + return !const_value; + } + +private: + ColumnPtr column; + /// Used if column is null + bool const_value; +}; + + bool canBecomeNullable(const DataTypePtr & type); DataTypePtr convertTypeToNullable(const DataTypePtr & type); void convertColumnToNullable(ColumnWithTypeAndName & column); @@ -58,7 +98,7 @@ void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count); bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type); /// Return mask array of type ColumnUInt8 for specified column. Source should have type UInt8 or Nullable(UInt8). -ColumnPtr getColumnAsMask(const Block & block, const String & column_name); +JoinMask getColumnAsMask(const Block & block, const String & column_name); /// Split key and other columns by keys name list void splitAdditionalColumns(const Names & key_names, const Block & sample_block, Block & block_keys, Block & block_others); diff --git a/src/Parsers/ASTCreateQuotaQuery.cpp b/src/Parsers/Access/ASTCreateQuotaQuery.cpp similarity index 98% rename from src/Parsers/ASTCreateQuotaQuery.cpp rename to src/Parsers/Access/ASTCreateQuotaQuery.cpp index 135b25c5bdb..4e4c84f9e93 100644 --- a/src/Parsers/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ASTCreateQuotaQuery.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Parsers/ASTCreateQuotaQuery.h b/src/Parsers/Access/ASTCreateQuotaQuery.h similarity index 100% rename from src/Parsers/ASTCreateQuotaQuery.h rename to src/Parsers/Access/ASTCreateQuotaQuery.h diff --git a/src/Parsers/ASTCreateRoleQuery.cpp b/src/Parsers/Access/ASTCreateRoleQuery.cpp similarity index 95% rename from src/Parsers/ASTCreateRoleQuery.cpp rename to src/Parsers/Access/ASTCreateRoleQuery.cpp index 73b523a5bfe..29e78d710cf 100644 --- a/src/Parsers/ASTCreateRoleQuery.cpp +++ b/src/Parsers/Access/ASTCreateRoleQuery.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include diff --git a/src/Parsers/ASTCreateRoleQuery.h b/src/Parsers/Access/ASTCreateRoleQuery.h similarity index 100% rename from src/Parsers/ASTCreateRoleQuery.h rename to src/Parsers/Access/ASTCreateRoleQuery.h diff --git a/src/Parsers/ASTCreateRowPolicyQuery.cpp b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp similarity index 97% rename from src/Parsers/ASTCreateRowPolicyQuery.cpp rename to src/Parsers/Access/ASTCreateRowPolicyQuery.cpp index 6aac008e0be..0267379d6e5 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp @@ -1,11 +1,11 @@ -#include -#include -#include +#include +#include +#include #include #include +#include #include #include -#include namespace DB diff --git a/src/Parsers/ASTCreateRowPolicyQuery.h b/src/Parsers/Access/ASTCreateRowPolicyQuery.h similarity index 100% rename from src/Parsers/ASTCreateRowPolicyQuery.h rename to src/Parsers/Access/ASTCreateRowPolicyQuery.h diff --git a/src/Parsers/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp similarity index 94% rename from src/Parsers/ASTCreateSettingsProfileQuery.cpp rename to src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp index e99c40ca681..d9385e6be7b 100644 --- a/src/Parsers/ASTCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include #include diff --git a/src/Parsers/ASTCreateSettingsProfileQuery.h b/src/Parsers/Access/ASTCreateSettingsProfileQuery.h similarity index 100% rename from src/Parsers/ASTCreateSettingsProfileQuery.h rename to src/Parsers/Access/ASTCreateSettingsProfileQuery.h diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp similarity index 83% rename from src/Parsers/ASTCreateUserQuery.cpp rename to src/Parsers/Access/ASTCreateUserQuery.cpp index 594d21f2a4b..18030a5ed80 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -1,7 +1,7 @@ -#include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -23,67 +23,67 @@ namespace } - void formatAuthentication(const Authentication & authentication, bool show_password, const IAST::FormatSettings & settings) + void formatAuthenticationData(const AuthenticationData & auth_data, bool show_password, const IAST::FormatSettings & settings) { - auto authentication_type = authentication.getType(); - if (authentication_type == Authentication::NO_PASSWORD) + auto auth_type = auth_data.getType(); + if (auth_type == AuthenticationType::NO_PASSWORD) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NOT IDENTIFIED" << (settings.hilite ? IAST::hilite_none : ""); return; } - String authentication_type_name = Authentication::TypeInfo::get(authentication_type).name; + String auth_type_name = AuthenticationTypeInfo::get(auth_type).name; String by_keyword = "BY"; std::optional by_value; if ( show_password || - authentication_type == Authentication::LDAP || - authentication_type == Authentication::KERBEROS + auth_type == AuthenticationType::LDAP || + auth_type == AuthenticationType::KERBEROS ) { - switch (authentication_type) + switch (auth_type) { - case Authentication::PLAINTEXT_PASSWORD: + case AuthenticationType::PLAINTEXT_PASSWORD: { - by_value = authentication.getPassword(); + by_value = auth_data.getPassword(); break; } - case Authentication::SHA256_PASSWORD: + case AuthenticationType::SHA256_PASSWORD: { - authentication_type_name = "sha256_hash"; - by_value = authentication.getPasswordHashHex(); + auth_type_name = "sha256_hash"; + by_value = auth_data.getPasswordHashHex(); break; } - case Authentication::DOUBLE_SHA1_PASSWORD: + case AuthenticationType::DOUBLE_SHA1_PASSWORD: { - authentication_type_name = "double_sha1_hash"; - by_value = authentication.getPasswordHashHex(); + auth_type_name = "double_sha1_hash"; + by_value = auth_data.getPasswordHashHex(); break; } - case Authentication::LDAP: + case AuthenticationType::LDAP: { by_keyword = "SERVER"; - by_value = authentication.getLDAPServerName(); + by_value = auth_data.getLDAPServerName(); break; } - case Authentication::KERBEROS: + case AuthenticationType::KERBEROS: { by_keyword = "REALM"; - const auto & realm = authentication.getKerberosRealm(); + const auto & realm = auth_data.getKerberosRealm(); if (!realm.empty()) by_value = realm; break; } - case Authentication::NO_PASSWORD: [[fallthrough]]; - case Authentication::MAX_TYPE: - throw Exception("AST: Unexpected authentication type " + toString(authentication_type), ErrorCodes::LOGICAL_ERROR); + case AuthenticationType::NO_PASSWORD: [[fallthrough]]; + case AuthenticationType::MAX: + throw Exception("AST: Unexpected authentication type " + toString(auth_type), ErrorCodes::LOGICAL_ERROR); } } - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH " << authentication_type_name + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH " << auth_type_name << (settings.hilite ? IAST::hilite_none : ""); if (by_value) @@ -258,8 +258,8 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & if (!new_name.empty()) formatRenameTo(new_name, format); - if (authentication) - formatAuthentication(*authentication, show_password, format); + if (auth_data) + formatAuthenticationData(*auth_data, show_password, format); if (hosts) formatHosts(nullptr, *hosts, format); diff --git a/src/Parsers/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h similarity index 95% rename from src/Parsers/ASTCreateUserQuery.h rename to src/Parsers/Access/ASTCreateUserQuery.h index 9e80abcb6dd..92db71e8581 100644 --- a/src/Parsers/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include namespace DB @@ -44,7 +44,7 @@ public: std::shared_ptr names; String new_name; - std::optional authentication; + std::optional auth_data; bool show_password = true; /// formatImpl() will show the password or hash. std::optional hosts; diff --git a/src/Parsers/ASTDropAccessEntityQuery.cpp b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp similarity index 93% rename from src/Parsers/ASTDropAccessEntityQuery.cpp rename to src/Parsers/Access/ASTDropAccessEntityQuery.cpp index 6c19c9f8af3..19064ad9109 100644 --- a/src/Parsers/ASTDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include diff --git a/src/Parsers/ASTDropAccessEntityQuery.h b/src/Parsers/Access/ASTDropAccessEntityQuery.h similarity index 100% rename from src/Parsers/ASTDropAccessEntityQuery.h rename to src/Parsers/Access/ASTDropAccessEntityQuery.h diff --git a/src/Parsers/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp similarity index 98% rename from src/Parsers/ASTGrantQuery.cpp rename to src/Parsers/Access/ASTGrantQuery.cpp index e2ac7658c0f..99dc119087c 100644 --- a/src/Parsers/ASTGrantQuery.cpp +++ b/src/Parsers/Access/ASTGrantQuery.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include diff --git a/src/Parsers/ASTGrantQuery.h b/src/Parsers/Access/ASTGrantQuery.h similarity index 97% rename from src/Parsers/ASTGrantQuery.h rename to src/Parsers/Access/ASTGrantQuery.h index b0fb64cb33e..f8ea9b478fe 100644 --- a/src/Parsers/ASTGrantQuery.h +++ b/src/Parsers/Access/ASTGrantQuery.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include diff --git a/src/Parsers/ASTRolesOrUsersSet.cpp b/src/Parsers/Access/ASTRolesOrUsersSet.cpp similarity index 98% rename from src/Parsers/ASTRolesOrUsersSet.cpp rename to src/Parsers/Access/ASTRolesOrUsersSet.cpp index fc5385e4a58..dc7626b90d6 100644 --- a/src/Parsers/ASTRolesOrUsersSet.cpp +++ b/src/Parsers/Access/ASTRolesOrUsersSet.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/Parsers/ASTRolesOrUsersSet.h b/src/Parsers/Access/ASTRolesOrUsersSet.h similarity index 100% rename from src/Parsers/ASTRolesOrUsersSet.h rename to src/Parsers/Access/ASTRolesOrUsersSet.h diff --git a/src/Parsers/ASTRowPolicyName.cpp b/src/Parsers/Access/ASTRowPolicyName.cpp similarity index 98% rename from src/Parsers/ASTRowPolicyName.cpp rename to src/Parsers/Access/ASTRowPolicyName.cpp index 0b69c1a46b3..c8b8107af20 100644 --- a/src/Parsers/ASTRowPolicyName.cpp +++ b/src/Parsers/Access/ASTRowPolicyName.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/src/Parsers/ASTRowPolicyName.h b/src/Parsers/Access/ASTRowPolicyName.h similarity index 100% rename from src/Parsers/ASTRowPolicyName.h rename to src/Parsers/Access/ASTRowPolicyName.h diff --git a/src/Parsers/ASTSetRoleQuery.cpp b/src/Parsers/Access/ASTSetRoleQuery.cpp similarity index 91% rename from src/Parsers/ASTSetRoleQuery.cpp rename to src/Parsers/Access/ASTSetRoleQuery.cpp index e59e103b774..c886da1c8b5 100644 --- a/src/Parsers/ASTSetRoleQuery.cpp +++ b/src/Parsers/Access/ASTSetRoleQuery.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include diff --git a/src/Parsers/ASTSetRoleQuery.h b/src/Parsers/Access/ASTSetRoleQuery.h similarity index 100% rename from src/Parsers/ASTSetRoleQuery.h rename to src/Parsers/Access/ASTSetRoleQuery.h diff --git a/src/Parsers/ASTSettingsProfileElement.cpp b/src/Parsers/Access/ASTSettingsProfileElement.cpp similarity index 98% rename from src/Parsers/ASTSettingsProfileElement.cpp rename to src/Parsers/Access/ASTSettingsProfileElement.cpp index 8f35c154a79..23dba8a926f 100644 --- a/src/Parsers/ASTSettingsProfileElement.cpp +++ b/src/Parsers/Access/ASTSettingsProfileElement.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Parsers/ASTSettingsProfileElement.h b/src/Parsers/Access/ASTSettingsProfileElement.h similarity index 100% rename from src/Parsers/ASTSettingsProfileElement.h rename to src/Parsers/Access/ASTSettingsProfileElement.h diff --git a/src/Parsers/ASTShowAccessEntitiesQuery.cpp b/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp similarity index 96% rename from src/Parsers/ASTShowAccessEntitiesQuery.cpp rename to src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp index 6dd53fd5cde..e2dfe031f53 100644 --- a/src/Parsers/ASTShowAccessEntitiesQuery.cpp +++ b/src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/Parsers/ASTShowAccessEntitiesQuery.h b/src/Parsers/Access/ASTShowAccessEntitiesQuery.h similarity index 100% rename from src/Parsers/ASTShowAccessEntitiesQuery.h rename to src/Parsers/Access/ASTShowAccessEntitiesQuery.h diff --git a/src/Parsers/ASTShowAccessQuery.h b/src/Parsers/Access/ASTShowAccessQuery.h similarity index 100% rename from src/Parsers/ASTShowAccessQuery.h rename to src/Parsers/Access/ASTShowAccessQuery.h diff --git a/src/Parsers/ASTShowCreateAccessEntityQuery.cpp b/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp similarity index 96% rename from src/Parsers/ASTShowCreateAccessEntityQuery.cpp rename to src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp index 5ff51a47002..db252db968d 100644 --- a/src/Parsers/ASTShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include diff --git a/src/Parsers/ASTShowCreateAccessEntityQuery.h b/src/Parsers/Access/ASTShowCreateAccessEntityQuery.h similarity index 100% rename from src/Parsers/ASTShowCreateAccessEntityQuery.h rename to src/Parsers/Access/ASTShowCreateAccessEntityQuery.h diff --git a/src/Parsers/ASTShowGrantsQuery.cpp b/src/Parsers/Access/ASTShowGrantsQuery.cpp similarity index 90% rename from src/Parsers/ASTShowGrantsQuery.cpp rename to src/Parsers/Access/ASTShowGrantsQuery.cpp index 4011cfc522c..5d54cf45dc1 100644 --- a/src/Parsers/ASTShowGrantsQuery.cpp +++ b/src/Parsers/Access/ASTShowGrantsQuery.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include diff --git a/src/Parsers/ASTShowGrantsQuery.h b/src/Parsers/Access/ASTShowGrantsQuery.h similarity index 100% rename from src/Parsers/ASTShowGrantsQuery.h rename to src/Parsers/Access/ASTShowGrantsQuery.h diff --git a/src/Parsers/ASTShowPrivilegesQuery.h b/src/Parsers/Access/ASTShowPrivilegesQuery.h similarity index 100% rename from src/Parsers/ASTShowPrivilegesQuery.h rename to src/Parsers/Access/ASTShowPrivilegesQuery.h diff --git a/src/Parsers/ASTUserNameWithHost.cpp b/src/Parsers/Access/ASTUserNameWithHost.cpp similarity index 97% rename from src/Parsers/ASTUserNameWithHost.cpp rename to src/Parsers/Access/ASTUserNameWithHost.cpp index b99ea5ab8d4..af84399ae45 100644 --- a/src/Parsers/ASTUserNameWithHost.cpp +++ b/src/Parsers/Access/ASTUserNameWithHost.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/Parsers/ASTUserNameWithHost.h b/src/Parsers/Access/ASTUserNameWithHost.h similarity index 100% rename from src/Parsers/ASTUserNameWithHost.h rename to src/Parsers/Access/ASTUserNameWithHost.h diff --git a/src/Parsers/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp similarity index 98% rename from src/Parsers/ParserCreateQuotaQuery.cpp rename to src/Parsers/Access/ParserCreateQuotaQuery.cpp index 682b345b937..0c6e1224cce 100644 --- a/src/Parsers/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -1,14 +1,14 @@ -#include -#include +#include +#include +#include +#include +#include +#include #include +#include +#include #include #include -#include -#include -#include -#include -#include -#include #include #include #include diff --git a/src/Parsers/ParserCreateQuotaQuery.h b/src/Parsers/Access/ParserCreateQuotaQuery.h similarity index 100% rename from src/Parsers/ParserCreateQuotaQuery.h rename to src/Parsers/Access/ParserCreateQuotaQuery.h diff --git a/src/Parsers/ParserCreateRoleQuery.cpp b/src/Parsers/Access/ParserCreateRoleQuery.cpp similarity index 93% rename from src/Parsers/ParserCreateRoleQuery.cpp rename to src/Parsers/Access/ParserCreateRoleQuery.cpp index 5863136750f..314075cb7c0 100644 --- a/src/Parsers/ParserCreateRoleQuery.cpp +++ b/src/Parsers/Access/ParserCreateRoleQuery.cpp @@ -1,11 +1,11 @@ -#include -#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include #include diff --git a/src/Parsers/ParserCreateRoleQuery.h b/src/Parsers/Access/ParserCreateRoleQuery.h similarity index 100% rename from src/Parsers/ParserCreateRoleQuery.h rename to src/Parsers/Access/ParserCreateRoleQuery.h diff --git a/src/Parsers/ParserCreateRowPolicyQuery.cpp b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp similarity index 96% rename from src/Parsers/ParserCreateRowPolicyQuery.cpp rename to src/Parsers/Access/ParserCreateRowPolicyQuery.cpp index d4d3db3f846..f6a33ec84a3 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp @@ -1,15 +1,15 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include +#include +#include +#include #include #include diff --git a/src/Parsers/ParserCreateRowPolicyQuery.h b/src/Parsers/Access/ParserCreateRowPolicyQuery.h similarity index 100% rename from src/Parsers/ParserCreateRowPolicyQuery.h rename to src/Parsers/Access/ParserCreateRowPolicyQuery.h diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp similarity index 93% rename from src/Parsers/ParserCreateSettingsProfileQuery.cpp rename to src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp index 2d1e6824b50..8b5f2df2dd2 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp @@ -1,12 +1,12 @@ -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include #include #include diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.h b/src/Parsers/Access/ParserCreateSettingsProfileQuery.h similarity index 100% rename from src/Parsers/ParserCreateSettingsProfileQuery.h rename to src/Parsers/Access/ParserCreateSettingsProfileQuery.h diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp similarity index 88% rename from src/Parsers/ParserCreateUserQuery.cpp rename to src/Parsers/Access/ParserCreateUserQuery.cpp index 7cada4b8ee8..c5b8c9e37b3 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -1,18 +1,18 @@ -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include #include +#include #include #include #include @@ -34,20 +34,20 @@ namespace } - bool parseAuthentication(IParserBase::Pos & pos, Expected & expected, Authentication & authentication) + bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, AuthenticationData & auth_data) { return IParserBase::wrapParseImpl(pos, [&] { if (ParserKeyword{"NOT IDENTIFIED"}.ignore(pos, expected)) { - authentication = Authentication{Authentication::NO_PASSWORD}; + auth_data = AuthenticationData{AuthenticationType::NO_PASSWORD}; return true; } if (!ParserKeyword{"IDENTIFIED"}.ignore(pos, expected)) return false; - std::optional type; + std::optional type; bool expect_password = false; bool expect_hash = false; bool expect_ldap_server_name = false; @@ -55,17 +55,17 @@ namespace if (ParserKeyword{"WITH"}.ignore(pos, expected)) { - for (auto check_type : collections::range(Authentication::MAX_TYPE)) + for (auto check_type : collections::range(AuthenticationType::MAX)) { - if (ParserKeyword{Authentication::TypeInfo::get(check_type).raw_name}.ignore(pos, expected)) + if (ParserKeyword{AuthenticationTypeInfo::get(check_type).raw_name}.ignore(pos, expected)) { type = check_type; - if (check_type == Authentication::LDAP) + if (check_type == AuthenticationType::LDAP) expect_ldap_server_name = true; - else if (check_type == Authentication::KERBEROS) + else if (check_type == AuthenticationType::KERBEROS) expect_kerberos_realm = true; - else if (check_type != Authentication::NO_PASSWORD) + else if (check_type != AuthenticationType::NO_PASSWORD) expect_password = true; break; @@ -76,12 +76,12 @@ namespace { if (ParserKeyword{"SHA256_HASH"}.ignore(pos, expected)) { - type = Authentication::SHA256_PASSWORD; + type = AuthenticationType::SHA256_PASSWORD; expect_hash = true; } else if (ParserKeyword{"DOUBLE_SHA1_HASH"}.ignore(pos, expected)) { - type = Authentication::DOUBLE_SHA1_PASSWORD; + type = AuthenticationType::DOUBLE_SHA1_PASSWORD; expect_hash = true; } else @@ -91,7 +91,7 @@ namespace if (!type) { - type = Authentication::SHA256_PASSWORD; + type = AuthenticationType::SHA256_PASSWORD; expect_password = true; } @@ -124,15 +124,15 @@ namespace } } - authentication = Authentication{*type}; + auth_data = AuthenticationData{*type}; if (expect_password) - authentication.setPassword(value); + auth_data.setPassword(value); else if (expect_hash) - authentication.setPasswordHashHex(value); + auth_data.setPasswordHashHex(value); else if (expect_ldap_server_name) - authentication.setLDAPServerName(value); + auth_data.setLDAPServerName(value); else if (expect_kerberos_realm) - authentication.setKerberosRealm(value); + auth_data.setKerberosRealm(value); return true; }); @@ -360,7 +360,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec auto names_ref = names->names; String new_name; - std::optional authentication; + std::optional auth_data; std::optional hosts; std::optional add_hosts; std::optional remove_hosts; @@ -372,12 +372,12 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec while (true) { - if (!authentication) + if (!auth_data) { - Authentication new_authentication; - if (parseAuthentication(pos, expected, new_authentication)) + AuthenticationData new_auth_data; + if (parseAuthenticationData(pos, expected, new_auth_data)) { - authentication = std::move(new_authentication); + auth_data = std::move(new_auth_data); continue; } } @@ -460,7 +460,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->cluster = std::move(cluster); query->names = std::move(names); query->new_name = std::move(new_name); - query->authentication = std::move(authentication); + query->auth_data = std::move(auth_data); query->hosts = std::move(hosts); query->add_hosts = std::move(add_hosts); query->remove_hosts = std::move(remove_hosts); diff --git a/src/Parsers/ParserCreateUserQuery.h b/src/Parsers/Access/ParserCreateUserQuery.h similarity index 100% rename from src/Parsers/ParserCreateUserQuery.h rename to src/Parsers/Access/ParserCreateUserQuery.h diff --git a/src/Parsers/ParserDropAccessEntityQuery.cpp b/src/Parsers/Access/ParserDropAccessEntityQuery.cpp similarity index 91% rename from src/Parsers/ParserDropAccessEntityQuery.cpp rename to src/Parsers/Access/ParserDropAccessEntityQuery.cpp index 14ef35e232c..d91cd8280a7 100644 --- a/src/Parsers/ParserDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ParserDropAccessEntityQuery.cpp @@ -1,10 +1,10 @@ -#include -#include +#include +#include +#include +#include +#include #include -#include -#include #include -#include #include diff --git a/src/Parsers/ParserDropAccessEntityQuery.h b/src/Parsers/Access/ParserDropAccessEntityQuery.h similarity index 100% rename from src/Parsers/ParserDropAccessEntityQuery.h rename to src/Parsers/Access/ParserDropAccessEntityQuery.h diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp similarity index 98% rename from src/Parsers/ParserGrantQuery.cpp rename to src/Parsers/Access/ParserGrantQuery.cpp index 85a6c9c71d4..8dd3e171237 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -1,11 +1,11 @@ -#include -#include -#include +#include +#include +#include +#include #include #include #include #include -#include #include #include #include diff --git a/src/Parsers/ParserGrantQuery.h b/src/Parsers/Access/ParserGrantQuery.h similarity index 100% rename from src/Parsers/ParserGrantQuery.h rename to src/Parsers/Access/ParserGrantQuery.h diff --git a/src/Parsers/ParserRolesOrUsersSet.cpp b/src/Parsers/Access/ParserRolesOrUsersSet.cpp similarity index 96% rename from src/Parsers/ParserRolesOrUsersSet.cpp rename to src/Parsers/Access/ParserRolesOrUsersSet.cpp index 41e9ee6501d..6f426d89bb3 100644 --- a/src/Parsers/ParserRolesOrUsersSet.cpp +++ b/src/Parsers/Access/ParserRolesOrUsersSet.cpp @@ -1,9 +1,9 @@ -#include +#include +#include +#include +#include #include #include -#include -#include -#include #include #include diff --git a/src/Parsers/ParserRolesOrUsersSet.h b/src/Parsers/Access/ParserRolesOrUsersSet.h similarity index 100% rename from src/Parsers/ParserRolesOrUsersSet.h rename to src/Parsers/Access/ParserRolesOrUsersSet.h diff --git a/src/Parsers/ParserRowPolicyName.cpp b/src/Parsers/Access/ParserRowPolicyName.cpp similarity index 98% rename from src/Parsers/ParserRowPolicyName.cpp rename to src/Parsers/Access/ParserRowPolicyName.cpp index a3e12009c9a..aa159532754 100644 --- a/src/Parsers/ParserRowPolicyName.cpp +++ b/src/Parsers/Access/ParserRowPolicyName.cpp @@ -1,10 +1,10 @@ -#include -#include -#include -#include +#include +#include #include #include #include +#include +#include #include diff --git a/src/Parsers/ParserRowPolicyName.h b/src/Parsers/Access/ParserRowPolicyName.h similarity index 100% rename from src/Parsers/ParserRowPolicyName.h rename to src/Parsers/Access/ParserRowPolicyName.h diff --git a/src/Parsers/ParserSetRoleQuery.cpp b/src/Parsers/Access/ParserSetRoleQuery.cpp similarity index 92% rename from src/Parsers/ParserSetRoleQuery.cpp rename to src/Parsers/Access/ParserSetRoleQuery.cpp index 678474af040..50ccc67a372 100644 --- a/src/Parsers/ParserSetRoleQuery.cpp +++ b/src/Parsers/Access/ParserSetRoleQuery.cpp @@ -1,8 +1,8 @@ -#include -#include +#include +#include +#include +#include #include -#include -#include namespace DB diff --git a/src/Parsers/ParserSetRoleQuery.h b/src/Parsers/Access/ParserSetRoleQuery.h similarity index 100% rename from src/Parsers/ParserSetRoleQuery.h rename to src/Parsers/Access/ParserSetRoleQuery.h diff --git a/src/Parsers/ParserSettingsProfileElement.cpp b/src/Parsers/Access/ParserSettingsProfileElement.cpp similarity index 98% rename from src/Parsers/ParserSettingsProfileElement.cpp rename to src/Parsers/Access/ParserSettingsProfileElement.cpp index d7d982efe23..2c58bd0e623 100644 --- a/src/Parsers/ParserSettingsProfileElement.cpp +++ b/src/Parsers/Access/ParserSettingsProfileElement.cpp @@ -1,10 +1,10 @@ -#include +#include +#include +#include +#include #include #include #include -#include -#include -#include #include #include diff --git a/src/Parsers/ParserSettingsProfileElement.h b/src/Parsers/Access/ParserSettingsProfileElement.h similarity index 100% rename from src/Parsers/ParserSettingsProfileElement.h rename to src/Parsers/Access/ParserSettingsProfileElement.h diff --git a/src/Parsers/ParserShowAccessEntitiesQuery.cpp b/src/Parsers/Access/ParserShowAccessEntitiesQuery.cpp similarity index 96% rename from src/Parsers/ParserShowAccessEntitiesQuery.cpp rename to src/Parsers/Access/ParserShowAccessEntitiesQuery.cpp index 96a275902fb..b1329735b64 100644 --- a/src/Parsers/ParserShowAccessEntitiesQuery.cpp +++ b/src/Parsers/Access/ParserShowAccessEntitiesQuery.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Parsers/ParserShowAccessEntitiesQuery.h b/src/Parsers/Access/ParserShowAccessEntitiesQuery.h similarity index 100% rename from src/Parsers/ParserShowAccessEntitiesQuery.h rename to src/Parsers/Access/ParserShowAccessEntitiesQuery.h diff --git a/src/Parsers/ParserShowAccessQuery.h b/src/Parsers/Access/ParserShowAccessQuery.h similarity index 92% rename from src/Parsers/ParserShowAccessQuery.h rename to src/Parsers/Access/ParserShowAccessQuery.h index b6483aa3d43..da0d6ff449f 100644 --- a/src/Parsers/ParserShowAccessQuery.h +++ b/src/Parsers/Access/ParserShowAccessQuery.h @@ -1,9 +1,9 @@ #pragma once #include +#include #include #include -#include namespace DB diff --git a/src/Parsers/ParserShowCreateAccessEntityQuery.cpp b/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp similarity index 95% rename from src/Parsers/ParserShowCreateAccessEntityQuery.cpp rename to src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp index 86ee64ab778..2df04513361 100644 --- a/src/Parsers/ParserShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp @@ -1,10 +1,10 @@ -#include -#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include #include #include #include diff --git a/src/Parsers/ParserShowCreateAccessEntityQuery.h b/src/Parsers/Access/ParserShowCreateAccessEntityQuery.h similarity index 100% rename from src/Parsers/ParserShowCreateAccessEntityQuery.h rename to src/Parsers/Access/ParserShowCreateAccessEntityQuery.h diff --git a/src/Parsers/ParserShowGrantsQuery.cpp b/src/Parsers/Access/ParserShowGrantsQuery.cpp similarity index 79% rename from src/Parsers/ParserShowGrantsQuery.cpp rename to src/Parsers/Access/ParserShowGrantsQuery.cpp index bd9e4012771..02d85d2f90b 100644 --- a/src/Parsers/ParserShowGrantsQuery.cpp +++ b/src/Parsers/Access/ParserShowGrantsQuery.cpp @@ -1,9 +1,9 @@ -#include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include namespace DB diff --git a/src/Parsers/ParserShowGrantsQuery.h b/src/Parsers/Access/ParserShowGrantsQuery.h similarity index 100% rename from src/Parsers/ParserShowGrantsQuery.h rename to src/Parsers/Access/ParserShowGrantsQuery.h diff --git a/src/Parsers/ParserShowPrivilegesQuery.cpp b/src/Parsers/Access/ParserShowPrivilegesQuery.cpp similarity index 76% rename from src/Parsers/ParserShowPrivilegesQuery.cpp rename to src/Parsers/Access/ParserShowPrivilegesQuery.cpp index 56b4327dccf..a120d4ed7c2 100644 --- a/src/Parsers/ParserShowPrivilegesQuery.cpp +++ b/src/Parsers/Access/ParserShowPrivilegesQuery.cpp @@ -1,6 +1,6 @@ -#include +#include +#include #include -#include namespace DB diff --git a/src/Parsers/ParserShowPrivilegesQuery.h b/src/Parsers/Access/ParserShowPrivilegesQuery.h similarity index 100% rename from src/Parsers/ParserShowPrivilegesQuery.h rename to src/Parsers/Access/ParserShowPrivilegesQuery.h diff --git a/src/Parsers/ParserUserNameWithHost.cpp b/src/Parsers/Access/ParserUserNameWithHost.cpp similarity index 95% rename from src/Parsers/ParserUserNameWithHost.cpp rename to src/Parsers/Access/ParserUserNameWithHost.cpp index 9cb4bb6fc97..c9c655fecc4 100644 --- a/src/Parsers/ParserUserNameWithHost.cpp +++ b/src/Parsers/Access/ParserUserNameWithHost.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Parsers/ParserUserNameWithHost.h b/src/Parsers/Access/ParserUserNameWithHost.h similarity index 100% rename from src/Parsers/ParserUserNameWithHost.h rename to src/Parsers/Access/ParserUserNameWithHost.h diff --git a/src/Parsers/parseUserName.cpp b/src/Parsers/Access/parseUserName.cpp similarity index 88% rename from src/Parsers/parseUserName.cpp rename to src/Parsers/Access/parseUserName.cpp index 1f25f51ef22..fb20d4d1e6c 100644 --- a/src/Parsers/parseUserName.cpp +++ b/src/Parsers/Access/parseUserName.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include diff --git a/src/Parsers/parseUserName.h b/src/Parsers/Access/parseUserName.h similarity index 100% rename from src/Parsers/parseUserName.h rename to src/Parsers/Access/parseUserName.h diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index a20dd3567a9..d945e63589a 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -1,8 +1,10 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") + add_headers_and_sources(clickhouse_parsers .) +add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) -target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io) +target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access) if (USE_DEBUG_HELPERS) set (INCLUDE_DEBUG_HELPERS "-I\"${ClickHouse_SOURCE_DIR}/base\" -include \"${ClickHouse_SOURCE_DIR}/src/Parsers/iostream_debug_helpers.h\"") diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index 345013b6475..7677efd9415 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -2,26 +2,27 @@ #include #include #include -#include -#include -#include -#include -#include -#include #include #include -#include #include #include #include #include #include #include -#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include + namespace DB { diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 4309063a736..f1e007948f9 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -13,16 +13,16 @@ #include #include #include -#include -#include -#include -#include -#include #include #include #include #include #include +#include +#include +#include +#include +#include #include "Common/Exception.h" diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h index 8f5992efb1b..99d25d87b73 100644 --- a/src/Processors/Formats/IInputFormat.h +++ b/src/Processors/Formats/IInputFormat.h @@ -16,16 +16,11 @@ struct ColumnMapping using OptionalIndexes = std::vector>; OptionalIndexes column_indexes_for_input_fields; - /// Tracks which columns we have read in a single read() call. - /// For columns that are never read, it is initialized to false when we - /// read the file header, and never changed afterwards. - /// For other columns, it is updated on each read() call. - std::vector read_columns; + /// The list of column indexes that are not presented in input data. + std::vector not_presented_columns; - - /// Whether we have any columns that are not read from file at all, - /// and must be always initialized with defaults. - bool have_always_default_columns{false}; + /// The list of column names in input data. Needed for better exception messages. + std::vector names_of_columns; }; using ColumnMappingPtr = std::shared_ptr; diff --git a/src/Processors/Formats/IRowOutputFormat.h b/src/Processors/Formats/IRowOutputFormat.h index c35d93b6133..18575419cd0 100644 --- a/src/Processors/Formats/IRowOutputFormat.h +++ b/src/Processors/Formats/IRowOutputFormat.h @@ -23,9 +23,18 @@ class WriteBuffer; */ class IRowOutputFormat : public IOutputFormat { +public: + using Params = RowOutputFormatParams; + +private: + bool prefix_written = false; + bool suffix_written = false; + protected: DataTypes types; Serializations serializations; + Params params; + bool first_row = true; void consume(Chunk chunk) override; @@ -33,9 +42,23 @@ protected: void consumeExtremes(Chunk chunk) override; void finalize() override; -public: - using Params = RowOutputFormatParams; + void writePrefixIfNot() + { + if (!prefix_written) + writePrefix(); + prefix_written = true; + } + + void writeSuffixIfNot() + { + if (!suffix_written) + writeSuffix(); + + suffix_written = true; + } + +public: IRowOutputFormat(const Block & header, WriteBuffer & out_, const Params & params_); /** Write a row. @@ -63,28 +86,6 @@ public: virtual void writeAfterExtremes() {} virtual void writeLastSuffix() {} /// Write something after resultset, totals end extremes. -private: - bool prefix_written = false; - bool suffix_written = false; - - Params params; - - void writePrefixIfNot() - { - if (!prefix_written) - writePrefix(); - - prefix_written = true; - } - - void writeSuffixIfNot() - { - if (!suffix_written) - writeSuffix(); - - suffix_written = true; - } - }; } diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 74eeee74475..467738f49e8 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -93,7 +93,7 @@ public: virtual void backup(size_t len) override { out.position() -= len; } virtual uint64_t byteCount() const override { return out.count(); } - virtual void flush() override { out.next(); } + virtual void flush() override { } private: WriteBuffer & out; @@ -386,11 +386,6 @@ AvroRowOutputFormat::AvroRowOutputFormat( : IRowOutputFormat(header_, out_, params_) , settings(settings_) , serializer(header_.getColumnsWithTypeAndName(), std::make_unique(settings)) - , file_writer( - std::make_unique(out_), - serializer.getSchema(), - settings.avro.output_sync_interval, - getCodec(settings.avro.output_codec)) { } @@ -398,19 +393,73 @@ AvroRowOutputFormat::~AvroRowOutputFormat() = default; void AvroRowOutputFormat::writePrefix() { - file_writer.syncIfNeeded(); + // we have to recreate avro::DataFileWriterBase object due to its interface limitations + file_writer_ptr = std::make_unique( + std::make_unique(out), + serializer.getSchema(), + settings.avro.output_sync_interval, + getCodec(settings.avro.output_codec)); + + file_writer_ptr->syncIfNeeded(); } void AvroRowOutputFormat::write(const Columns & columns, size_t row_num) { - file_writer.syncIfNeeded(); - serializer.serializeRow(columns, row_num, file_writer.encoder()); - file_writer.incr(); + file_writer_ptr->syncIfNeeded(); + serializer.serializeRow(columns, row_num, file_writer_ptr->encoder()); + file_writer_ptr->incr(); } void AvroRowOutputFormat::writeSuffix() { - file_writer.close(); + file_writer_ptr.reset(); +} + +void AvroRowOutputFormat::consume(DB::Chunk chunk) +{ + if (params.callback) + consumeImplWithCallback(std::move(chunk)); + else + consumeImpl(std::move(chunk)); +} + +void AvroRowOutputFormat::consumeImpl(DB::Chunk chunk) +{ + auto num_rows = chunk.getNumRows(); + const auto & columns = chunk.getColumns(); + + writePrefixIfNot(); + for (size_t row = 0; row < num_rows; ++row) + { + write(columns, row); + } + +} + +void AvroRowOutputFormat::consumeImplWithCallback(DB::Chunk chunk) +{ + auto num_rows = chunk.getNumRows(); + const auto & columns = chunk.getColumns(); + + for (size_t row = 0; row < num_rows;) + { + size_t current_row = row; + /// used by WriteBufferToKafkaProducer to obtain auxiliary data + /// from the starting row of a file + + writePrefix(); + for (size_t row_in_file = 0; + row_in_file < settings.avro.output_rows_in_file && row < num_rows; + ++row, ++row_in_file) + { + write(columns, row); + } + + file_writer_ptr->flush(); + writeSuffix(); + + params.callback(columns, current_row); + } } void registerOutputFormatAvro(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h index c807736071e..a3e8493f757 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -49,6 +49,7 @@ public: AvroRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_); virtual ~AvroRowOutputFormat() override; + void consume(Chunk) override; String getName() const override { return "AvroRowOutputFormat"; } void write(const Columns & columns, size_t row_num) override; void writeField(const IColumn &, const ISerialization &, size_t) override {} @@ -58,7 +59,11 @@ public: private: FormatSettings settings; AvroSerializer serializer; - avro::DataFileWriterBase file_writer; + std::unique_ptr file_writer_ptr; + + void consumeImpl(Chunk); + void consumeImplWithCallback(Chunk); + }; } diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index c122b9eea1a..0506c539c0f 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -2,79 +2,96 @@ #include #include #include +#include +#include namespace DB { -BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_) - : IRowInputFormat(std::move(header), in_, params_), with_names(with_names_), with_types(with_types_) +namespace ErrorCodes +{ + extern const int CANNOT_SKIP_UNKNOWN_FIELD; +} + +BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) + : RowInputFormatWithNamesAndTypes(std::move(header), in_, std::move(params_), with_names_, with_types_, format_settings_) { } - -bool BinaryRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) +std::vector BinaryRowInputFormat::readHeaderRow() { - if (in->eof()) - return false; + std::vector fields; + String field; + for (size_t i = 0; i < read_columns; ++i) + { + readStringBinary(field, *in); + fields.push_back(field); + } + return fields; +} - size_t num_columns = columns.size(); - for (size_t i = 0; i < num_columns; ++i) - serializations[i]->deserializeBinary(*columns[i], *in); +std::vector BinaryRowInputFormat::readNames() +{ + readVarUInt(read_columns, *in); + return readHeaderRow(); +} +std::vector BinaryRowInputFormat::readTypes() +{ + auto types = readHeaderRow(); + for (const auto & type_name : types) + read_data_types.push_back(DataTypeFactory::instance().get(type_name)); + return types; +} + +bool BinaryRowInputFormat::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/) +{ + serialization->deserializeBinary(column, *in); return true; } - -void BinaryRowInputFormat::readPrefix() +void BinaryRowInputFormat::skipHeaderRow() { - /// NOTE: The header is completely ignored. This can be easily improved. - - UInt64 columns = 0; String tmp; - - if (with_names || with_types) - { - readVarUInt(columns, *in); - } - - if (with_names) - { - for (size_t i = 0; i < columns; ++i) - { - readStringBinary(tmp, *in); - } - } - - if (with_types) - { - for (size_t i = 0; i < columns; ++i) - { - readStringBinary(tmp, *in); - } - } + for (size_t i = 0; i < read_columns; ++i) + readStringBinary(tmp, *in); } +void BinaryRowInputFormat::skipNames() +{ + readVarUInt(read_columns, *in); + skipHeaderRow(); +} + +void BinaryRowInputFormat::skipTypes() +{ + skipHeaderRow(); +} + +void BinaryRowInputFormat::skipField(size_t file_column) +{ + if (file_column >= read_data_types.size()) + throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, "Cannot skip unknown field in RowBinaryWithNames format, because it's type is unknown"); + Field field; + read_data_types[file_column]->getDefaultSerialization()->deserializeBinary(field, *in); +} void registerInputFormatRowBinary(FormatFactory & factory) { - factory.registerInputFormat("RowBinary", []( - ReadBuffer & buf, - const Block & sample, - const IRowInputFormat::Params & params, - const FormatSettings &) + auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - return std::make_shared(buf, sample, params, false, false); - }); + factory.registerInputFormat(format_name, [with_names, with_types]( + ReadBuffer & buf, + const Block & sample, + const IRowInputFormat::Params & params, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, params, with_names, with_types, settings); + }); + }; - factory.registerInputFormat("RowBinaryWithNamesAndTypes", []( - ReadBuffer & buf, - const Block & sample, - const IRowInputFormat::Params & params, - const FormatSettings &) - { - return std::make_shared(buf, sample, params, true, true); - }); + registerWithNamesAndTypes("RowBinary", register_func); } } diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index e96a516c1a7..61d6df77522 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -12,19 +13,33 @@ class ReadBuffer; /** A stream for inputting data in a binary line-by-line format. */ -class BinaryRowInputFormat : public IRowInputFormat +class BinaryRowInputFormat : public RowInputFormatWithNamesAndTypes { public: - BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_); - - bool readRow(MutableColumns & columns, RowReadExtension &) override; - void readPrefix() override; + BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); String getName() const override { return "BinaryRowInputFormat"; } + /// RowInputFormatWithNamesAndTypes implements logic with DiagnosticInfo, but + /// in this format we cannot provide any DiagnosticInfo, because here we have + /// just binary data. + std::string getDiagnosticInfo() override { return {}; } + private: - bool with_names; - bool with_types; + bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override; + void skipField(size_t file_column) override; + + void skipNames() override; + void skipTypes() override; + void skipHeaderRow(); + + std::vector readNames() override; + std::vector readTypes() override; + std::vector readHeaderRow(); + + /// Data types read from input data. + DataTypes read_data_types; + UInt64 read_columns = 0; }; } diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp index dbaee68453e..02c4aee5e4e 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -49,23 +50,19 @@ void BinaryRowOutputFormat::writeField(const IColumn & column, const ISerializat void registerOutputFormatRowBinary(FormatFactory & factory) { - factory.registerOutputFormat("RowBinary", []( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings &) + auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - return std::make_shared(buf, sample, false, false, params); - }); + factory.registerOutputFormat(format_name, [with_names, with_types]( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings &) + { + return std::make_shared(buf, sample, with_names, with_types, params); + }); + }; - factory.registerOutputFormat("RowBinaryWithNamesAndTypes", []( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings &) - { - return std::make_shared(buf, sample, true, true, params); - }); + registerWithNamesAndTypes("RowBinary", register_func); } } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 4beb260b64a..9de2b908b1e 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -3,12 +3,12 @@ #include #include -#include +#include #include +#include #include #include - namespace DB { @@ -19,62 +19,21 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - -CSVRowInputFormat::CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, const FormatSettings & format_settings_) - : RowInputFormatWithDiagnosticInfo(header_, in_, params_) - , with_names(with_names_) - , format_settings(format_settings_) +CSVRowInputFormat::CSVRowInputFormat( + const Block & header_, + ReadBuffer & in_, + const Params & params_, + bool with_names_, + bool with_types_, + const FormatSettings & format_settings_) + : RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_) { - const String bad_delimiters = " \t\"'.UL"; if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos) - throw Exception(String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter + - "'. Try use CustomSeparated format instead.", ErrorCodes::BAD_ARGUMENTS); - - const auto & sample = getPort().getHeader(); - size_t num_columns = sample.columns(); - - data_types.resize(num_columns); - column_indexes_by_names.reserve(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - const auto & column_info = sample.getByPosition(i); - - data_types[i] = column_info.type; - column_indexes_by_names.emplace(column_info.name, i); - } -} - - -/// Map an input file column to a table column, based on its name. -void CSVRowInputFormat::addInputColumn(const String & column_name) -{ - const auto column_it = column_indexes_by_names.find(column_name); - if (column_it == column_indexes_by_names.end()) - { - if (format_settings.skip_unknown_fields) - { - column_mapping->column_indexes_for_input_fields.push_back(std::nullopt); - return; - } - throw Exception( - "Unknown field found in CSV header: '" + column_name + "' " + - "at position " + std::to_string(column_mapping->column_indexes_for_input_fields.size()) + - "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", - ErrorCodes::INCORRECT_DATA - ); - } - - const auto column_index = column_it->second; - - if (column_mapping->read_columns[column_index]) - throw Exception("Duplicate field found while parsing CSV header: " + column_name, ErrorCodes::INCORRECT_DATA); - - column_mapping->read_columns[column_index] = true; - column_mapping->column_indexes_for_input_fields.emplace_back(column_index); + String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter + + "'. Try use CustomSeparated format instead.", + ErrorCodes::BAD_ARGUMENTS); } static void skipEndOfLine(ReadBuffer & in) @@ -100,29 +59,6 @@ static void skipEndOfLine(ReadBuffer & in) throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA); } - -static void skipDelimiter(ReadBuffer & in, const char delimiter, bool is_last_column) -{ - if (is_last_column) - { - if (in.eof()) - return; - - /// we support the extra delimiter at the end of the line - if (*in.position() == delimiter) - { - ++in.position(); - if (in.eof()) - return; - } - - skipEndOfLine(in); - } - else - assertChar(delimiter, in); -} - - /// Skip `whitespace` symbols allowed in CSV. static inline void skipWhitespacesAndTabs(ReadBuffer & in) { @@ -132,255 +68,138 @@ static inline void skipWhitespacesAndTabs(ReadBuffer & in) ++in.position(); } - -static void skipRow(ReadBuffer & in, const FormatSettings::CSV & settings, size_t num_columns) +void CSVRowInputFormat::skipFieldDelimiter() { - String tmp; - for (size_t i = 0; i < num_columns; ++i) - { - skipWhitespacesAndTabs(in); - readCSVString(tmp, in, settings); - skipWhitespacesAndTabs(in); - - skipDelimiter(in, settings.delimiter, i + 1 == num_columns); - } + skipWhitespacesAndTabs(*in); + assertChar(format_settings.csv.delimiter, *in); } -void CSVRowInputFormat::setupAllColumnsByTableSchema() +String CSVRowInputFormat::readFieldIntoString() { - const auto & header = getPort().getHeader(); - column_mapping->read_columns.assign(header.columns(), true); - column_mapping->column_indexes_for_input_fields.resize(header.columns()); - - for (size_t i = 0; i < column_mapping->column_indexes_for_input_fields.size(); ++i) - column_mapping->column_indexes_for_input_fields[i] = i; + skipWhitespacesAndTabs(*in); + String field; + readCSVString(field, *in, format_settings.csv); + return field; } - -void CSVRowInputFormat::readPrefix() +void CSVRowInputFormat::skipField() { - /// In this format, we assume, that if first string field contain BOM as value, it will be written in quotes, - /// so BOM at beginning of stream cannot be confused with BOM in first string value, and it is safe to skip it. - skipBOMIfExists(*in); - - size_t num_columns = data_types.size(); - const auto & header = getPort().getHeader(); - - /// This is a bit of abstraction leakage, but we have almost the same code in other places. - /// Thus, we check if this InputFormat is working with the "real" beginning of the data in case of parallel parsing. - if (with_names && getCurrentUnitNumber() == 0) - { - /// This CSV file has a header row with column names. Depending on the - /// settings, use it or skip it. - if (format_settings.with_names_use_header) - { - /// Look at the file header to see which columns we have there. - /// The missing columns are filled with defaults. - column_mapping->read_columns.assign(header.columns(), false); - do - { - String column_name; - skipWhitespacesAndTabs(*in); - readCSVString(column_name, *in, format_settings.csv); - skipWhitespacesAndTabs(*in); - - addInputColumn(column_name); - } - while (checkChar(format_settings.csv.delimiter, *in)); - - skipDelimiter(*in, format_settings.csv.delimiter, true); - - for (auto read_column : column_mapping->read_columns) - { - if (!read_column) - { - column_mapping->have_always_default_columns = true; - break; - } - } - - return; - } - else - { - skipRow(*in, format_settings.csv, num_columns); - setupAllColumnsByTableSchema(); - } - } - else if (!column_mapping->is_set) - setupAllColumnsByTableSchema(); + readFieldIntoString(); } - -bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) +void CSVRowInputFormat::skipRowEndDelimiter() { + skipWhitespacesAndTabs(*in); + if (in->eof()) - return false; + return; - updateDiagnosticInfo(); + /// we support the extra delimiter at the end of the line + if (*in->position() == format_settings.csv.delimiter) + ++in->position(); - /// Track whether we have to fill any columns in this row with default - /// values. If not, we return an empty column mask to the caller, so that - /// it doesn't have to check it. - bool have_default_columns = column_mapping->have_always_default_columns; + skipWhitespacesAndTabs(*in); + if (in->eof()) + return; - ext.read_columns.assign(column_mapping->read_columns.size(), true); - const auto delimiter = format_settings.csv.delimiter; - for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) - { - const auto & table_column = column_mapping->column_indexes_for_input_fields[file_column]; - const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); - - if (table_column) - { - skipWhitespacesAndTabs(*in); - ext.read_columns[*table_column] = readField(*columns[*table_column], data_types[*table_column], - serializations[*table_column], is_last_file_column); - - if (!ext.read_columns[*table_column]) - have_default_columns = true; - skipWhitespacesAndTabs(*in); - } - else - { - /// We never read this column from the file, just skip it. - String tmp; - readCSVString(tmp, *in, format_settings.csv); - } - - skipDelimiter(*in, delimiter, is_last_file_column); - } - - if (have_default_columns) - { - for (size_t i = 0; i < column_mapping->read_columns.size(); i++) - { - if (!column_mapping->read_columns[i]) - { - /// The column value for this row is going to be overwritten - /// with default by the caller, but the general assumption is - /// that the column size increases for each row, so we have - /// to insert something. Since we do not care about the exact - /// value, we do not have to use the default value specified by - /// the data type, and can just use IColumn::insertDefault(). - columns[i]->insertDefault(); - ext.read_columns[i] = false; - } - } - } - - return true; + skipEndOfLine(*in); } -bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) +void CSVRowInputFormat::skipHeaderRow() +{ + do + { + skipField(); + skipWhitespacesAndTabs(*in); + } + while (checkChar(format_settings.csv.delimiter, *in)); + + skipRowEndDelimiter(); +} + +std::vector CSVRowInputFormat::readHeaderRow() +{ + std::vector fields; + do + { + fields.push_back(readFieldIntoString()); + skipWhitespacesAndTabs(*in); + } + while (checkChar(format_settings.csv.delimiter, *in)); + + skipRowEndDelimiter(); + return fields; +} + +bool CSVRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) { const char delimiter = format_settings.csv.delimiter; - for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) + try { - if (file_column == 0 && in->eof()) - { - out << "\n"; - return false; - } - skipWhitespacesAndTabs(*in); - if (column_mapping->column_indexes_for_input_fields[file_column].has_value()) + assertChar(delimiter, *in); + } + catch (const DB::Exception &) + { + if (*in->position() == '\n' || *in->position() == '\r') { - const auto & header = getPort().getHeader(); - size_t col_idx = column_mapping->column_indexes_for_input_fields[file_column].value(); - if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx], - out, file_column)) - return false; + out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected." + " It's like your file has less columns than expected.\n" + "And if your file has the right number of columns, maybe it has unescaped quotes in values.\n"; } else { - static const String skipped_column_str = ""; - static const DataTypePtr skipped_column_type = std::make_shared(); - static const MutableColumnPtr skipped_column = skipped_column_type->createColumn(); - if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column)) - return false; - } - skipWhitespacesAndTabs(*in); - - /// Delimiters - if (file_column + 1 == column_mapping->column_indexes_for_input_fields.size()) - { - if (in->eof()) - return false; - - /// we support the extra delimiter at the end of the line - if (*in->position() == delimiter) - { - ++in->position(); - if (in->eof()) - break; - } - - if (!in->eof() && *in->position() != '\n' && *in->position() != '\r') - { - out << "ERROR: There is no line feed. "; - verbosePrintString(in->position(), in->position() + 1, out); - out << " found instead.\n" - " It's like your file has more columns than expected.\n" - "And if your file has the right number of columns, maybe it has an unquoted string value with a comma.\n"; - - return false; - } - - skipEndOfLine(*in); - } - else - { - try - { - assertChar(delimiter, *in); - } - catch (const DB::Exception &) - { - if (*in->position() == '\n' || *in->position() == '\r') - { - out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected." - " It's like your file has less columns than expected.\n" - "And if your file has the right number of columns, maybe it has unescaped quotes in values.\n"; - } - else - { - out << "ERROR: There is no delimiter (" << delimiter << "). "; - verbosePrintString(in->position(), in->position() + 1, out); - out << " found instead.\n"; - } - return false; - } + out << "ERROR: There is no delimiter (" << delimiter << "). "; + verbosePrintString(in->position(), in->position() + 1, out); + out << " found instead.\n"; } + return false; } return true; } +bool CSVRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out) +{ + skipWhitespacesAndTabs(*in); + + if (in->eof()) + return true; + + /// we support the extra delimiter at the end of the line + if (*in->position() == format_settings.csv.delimiter) + { + ++in->position(); + skipWhitespacesAndTabs(*in); + if (in->eof()) + return true; + } + + if (!in->eof() && *in->position() != '\n' && *in->position() != '\r') + { + out << "ERROR: There is no line feed. "; + verbosePrintString(in->position(), in->position() + 1, out); + out << " found instead.\n" + " It's like your file has more columns than expected.\n" + "And if your file has the right number of columns, maybe it has an unquoted string value with a comma.\n"; + + return false; + } + + skipEndOfLine(*in); + return true; +} void CSVRowInputFormat::syncAfterError() { skipToNextLineOrEOF(*in); } -void CSVRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) +bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/) { - const auto & index = column_mapping->column_indexes_for_input_fields[file_column]; - if (index) - { - const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); - readField(column, type, serializations[*index], is_last_file_column); - } - else - { - String tmp; - readCSVString(tmp, *in, format_settings.csv); - } -} + skipWhitespacesAndTabs(*in); -bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column) -{ const bool at_delimiter = !in->eof() && *in->position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column && (in->eof() || *in->position() == '\n' || *in->position() == '\r'); @@ -399,7 +218,7 @@ bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, co column.insertDefault(); return false; } - else if (format_settings.null_as_default && !type->isNullable()) + else if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) { /// If value is null but type is not nullable then use default value instead. return SerializationNullable::deserializeTextCSVImpl(column, *in, format_settings, serialization); @@ -412,31 +231,24 @@ bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, co } } -void CSVRowInputFormat::resetParser() -{ - RowInputFormatWithDiagnosticInfo::resetParser(); - column_mapping->column_indexes_for_input_fields.clear(); - column_mapping->read_columns.clear(); - column_mapping->have_always_default_columns = false; -} - - void registerInputFormatCSV(FormatFactory & factory) { - for (bool with_names : {false, true}) + auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - factory.registerInputFormat(with_names ? "CSVWithNames" : "CSV", [=]( + factory.registerInputFormat(format_name, [with_names, with_types]( ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { - return std::make_shared(sample, buf, params, with_names, settings); + return std::make_shared(sample, buf, std::move(params), with_names, with_types, settings); }); - } + }; + + registerWithNamesAndTypes("CSV", register_func); } -static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) +static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows) { char * pos = in.position(); bool quotes = false; @@ -476,7 +288,7 @@ static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB else if (*pos == '\n') { ++number_of_rows; - if (memory.size() + static_cast(pos - in.position()) >= min_chunk_size) + if (memory.size() + static_cast(pos - in.position()) >= min_chunk_size && number_of_rows >= min_rows) need_more_data = false; ++pos; if (loadAtPosition(in, memory, pos) && *pos == '\r') @@ -484,7 +296,7 @@ static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB } else if (*pos == '\r') { - if (memory.size() + static_cast(pos - in.position()) >= min_chunk_size) + if (memory.size() + static_cast(pos - in.position()) >= min_chunk_size && number_of_rows >= min_rows) need_more_data = false; ++pos; if (loadAtPosition(in, memory, pos) && *pos == '\n') @@ -502,8 +314,16 @@ static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB void registerFileSegmentationEngineCSV(FormatFactory & factory) { - factory.registerFileSegmentationEngine("CSV", &fileSegmentationEngineCSVImpl); - factory.registerFileSegmentationEngine("CSVWithNames", &fileSegmentationEngineCSVImpl); + auto register_func = [&](const String & format_name, bool with_names, bool with_types) + { + size_t min_rows = 1 + int(with_names) + int(with_types); + factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) + { + return fileSegmentationEngineCSVImpl(in, memory, min_chunk_size, min_rows); + }); + }; + + registerWithNamesAndTypes("CSV", register_func); } } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index b6075745b39..f239464485a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -4,7 +4,7 @@ #include #include -#include +#include #include @@ -14,41 +14,44 @@ namespace DB /** A stream for inputting data in csv format. * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values. */ -class CSVRowInputFormat : public RowInputFormatWithDiagnosticInfo +class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes { public: /** with_names - in the first line the header with column names + * with_types - on the next line header with type names */ CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, const FormatSettings & format_settings_); + bool with_names_, bool with_types_, const FormatSettings & format_settings_); String getName() const override { return "CSVRowInputFormat"; } - bool readRow(MutableColumns & columns, RowReadExtension & ext) override; - void readPrefix() override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; - void resetParser() override; private: - /// There fields are computed in constructor. - bool with_names; - const FormatSettings format_settings; - DataTypes data_types; - using IndexesMap = std::unordered_map; - IndexesMap column_indexes_by_names; - - void addInputColumn(const String & column_name); - - void setupAllColumnsByTableSchema(); - bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; - void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; + bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; + bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\r' && *pos != format_settings.csv.delimiter && *pos != ' ' && *pos != '\t'; } - bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column); + bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override; + + void skipField(size_t /*file_column*/) override { skipField(); } + void skipField(); + + void skipHeaderRow() ; + void skipNames() override { skipHeaderRow(); } + void skipTypes() override { skipHeaderRow(); } + void skipFieldDelimiter() override; + void skipRowEndDelimiter() override; + + std::vector readHeaderRow(); + std::vector readNames() override { return readHeaderRow(); } + std::vector readTypes() override { return readHeaderRow(); } + + String readFieldIntoString(); }; } diff --git a/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp b/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp index 14d0e519c0c..b300928e569 100644 --- a/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -8,8 +9,8 @@ namespace DB { -CSVRowOutputFormat::CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), with_names(with_names_), format_settings(format_settings_) +CSVRowOutputFormat::CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, bool with_types_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); size_t columns = sample.columns(); @@ -18,25 +19,27 @@ CSVRowOutputFormat::CSVRowOutputFormat(WriteBuffer & out_, const Block & header_ data_types[i] = sample.safeGetByPosition(i).type; } +void CSVRowOutputFormat::writeLine(const std::vector & values) +{ + for (size_t i = 0; i < values.size(); ++i) + { + writeCSVString(values[i], out); + if (i + 1 == values.size()) + writeRowEndDelimiter(); + else + writeFieldDelimiter(); + } +} void CSVRowOutputFormat::doWritePrefix() { const auto & sample = getPort(PortKind::Main).getHeader(); - size_t columns = sample.columns(); if (with_names) - { - for (size_t i = 0; i < columns; ++i) - { - writeCSVString(sample.safeGetByPosition(i).name, out); + writeLine(sample.getNames()); - char delimiter = format_settings.csv.delimiter; - if (i + 1 == columns) - delimiter = '\n'; - - writeChar(delimiter, out); - } - } + if (with_types) + writeLine(sample.getDataTypeNames()); } @@ -72,18 +75,20 @@ void CSVRowOutputFormat::writeBeforeExtremes() void registerOutputFormatCSV(FormatFactory & factory) { - for (bool with_names : {false, true}) + auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - factory.registerOutputFormat(with_names ? "CSVWithNames" : "CSV", [=]( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings & format_settings) + factory.registerOutputFormat(format_name, [with_names, with_types]( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & format_settings) { - return std::make_shared(buf, sample, with_names, params, format_settings); + return std::make_shared(buf, sample, with_names, with_types, params, format_settings); }); - factory.markOutputFormatSupportsParallelFormatting(with_names ? "CSVWithNames" : "CSV"); - } + factory.markOutputFormatSupportsParallelFormatting(format_name); + }; + + registerWithNamesAndTypes("CSV", register_func); } } diff --git a/src/Processors/Formats/Impl/CSVRowOutputFormat.h b/src/Processors/Formats/Impl/CSVRowOutputFormat.h index 780a6c4d3ce..7f5d90203ea 100644 --- a/src/Processors/Formats/Impl/CSVRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowOutputFormat.h @@ -20,7 +20,7 @@ public: /** with_names - output in the first line a header with column names * with_types - output in the next line header with the names of the types */ - CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); + CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, bool with_types, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); String getName() const override { return "CSVRowOutputFormat"; } @@ -38,9 +38,11 @@ public: return String("text/csv; charset=UTF-8; header=") + (with_names ? "present" : "absent"); } -protected: +private: + void writeLine(const std::vector & values); bool with_names; + bool with_types; const FormatSettings format_settings; DataTypes data_types; }; diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 5b32bf94c4d..a5e0ac6862c 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -185,7 +185,7 @@ void registerInputFormatJSONAsString(FormatFactory & factory) void registerFileSegmentationEngineJSONAsString(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONAsString", &fileSegmentationEngineJSONEachRowImpl); + factory.registerFileSegmentationEngine("JSONAsString", &fileSegmentationEngineJSONEachRow); } void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 962e9d6e5ac..88fb411ffbd 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -1,8 +1,11 @@ -#include -#include - #include + +#include +#include #include +#include +#include +#include #include #include @@ -12,183 +15,39 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; - extern const int CANNOT_READ_ALL_DATA; } -JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat(ReadBuffer & in_, - const Block & header_, - Params params_, - const FormatSettings & format_settings_, - bool with_names_, - bool yield_strings_) - : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), with_names(with_names_), yield_strings(yield_strings_) +JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat( + const Block & header_, + ReadBuffer & in_, + Params params_, + bool with_names_, + bool with_types_, + bool yield_strings_, + const FormatSettings & format_settings_) + : RowInputFormatWithNamesAndTypes(header_, in_, std::move(params_), with_names_, with_types_, format_settings_) + , yield_strings(yield_strings_) { - const auto & sample = getPort().getHeader(); - size_t num_columns = sample.columns(); - - data_types.resize(num_columns); - column_indexes_by_names.reserve(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - const auto & column_info = sample.getByPosition(i); - - data_types[i] = column_info.type; - column_indexes_by_names.emplace(column_info.name, i); - } } -void JSONCompactEachRowRowInputFormat::resetParser() +void JSONCompactEachRowRowInputFormat::skipRowStartDelimiter() { - IRowInputFormat::resetParser(); - column_indexes_for_input_fields.clear(); - not_seen_columns.clear(); -} - -void JSONCompactEachRowRowInputFormat::readPrefix() -{ - /// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it. - skipBOMIfExists(*in); - - if (with_names) - { - size_t num_columns = getPort().getHeader().columns(); - read_columns.assign(num_columns, false); - - assertChar('[', *in); - do - { - skipWhitespaceIfAny(*in); - String column_name; - readJSONString(column_name, *in); - addInputColumn(column_name); - skipWhitespaceIfAny(*in); - } - while (checkChar(',', *in)); - assertChar(']', *in); - skipEndOfLine(); - - /// Type checking - assertChar('[', *in); - for (size_t i = 0; i < column_indexes_for_input_fields.size(); ++i) - { - skipWhitespaceIfAny(*in); - String data_type; - readJSONString(data_type, *in); - - if (column_indexes_for_input_fields[i] && - data_types[*column_indexes_for_input_fields[i]]->getName() != data_type) - { - throw Exception( - "Type of '" + getPort().getHeader().getByPosition(*column_indexes_for_input_fields[i]).name - + "' must be " + data_types[*column_indexes_for_input_fields[i]]->getName() + - ", not " + data_type, - ErrorCodes::INCORRECT_DATA - ); - } - - if (i != column_indexes_for_input_fields.size() - 1) - assertChar(',', *in); - skipWhitespaceIfAny(*in); - } - assertChar(']', *in); - } - else - { - size_t num_columns = getPort().getHeader().columns(); - read_columns.assign(num_columns, true); - column_indexes_for_input_fields.resize(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - column_indexes_for_input_fields[i] = i; - } - } - - for (size_t i = 0; i < read_columns.size(); ++i) - { - if (!read_columns[i]) - { - not_seen_columns.emplace_back(i); - } - } -} - -void JSONCompactEachRowRowInputFormat::addInputColumn(const String & column_name) -{ - names_of_columns.emplace_back(column_name); - - const auto column_it = column_indexes_by_names.find(column_name); - if (column_it == column_indexes_by_names.end()) - { - if (format_settings.skip_unknown_fields) - { - column_indexes_for_input_fields.push_back(std::nullopt); - return; - } - - throw Exception( - "Unknown field found in JSONCompactEachRow header: '" + column_name + "' " + - "at position " + std::to_string(column_indexes_for_input_fields.size()) + - "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", - ErrorCodes::INCORRECT_DATA - ); - } - - const auto column_index = column_it->second; - - if (read_columns[column_index]) - throw Exception("Duplicate field found while parsing JSONCompactEachRow header: " + column_name, ErrorCodes::INCORRECT_DATA); - - read_columns[column_index] = true; - column_indexes_for_input_fields.emplace_back(column_index); -} - -bool JSONCompactEachRowRowInputFormat::readRow(DB::MutableColumns &columns, DB::RowReadExtension &ext) -{ - skipEndOfLine(); - - if (in->eof()) - return false; - - size_t num_columns = columns.size(); - - read_columns.assign(num_columns, false); - + skipWhitespaceIfAny(*in); assertChar('[', *in); - for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) - { - const auto & table_column = column_indexes_for_input_fields[file_column]; - if (table_column) - { - readField(*table_column, columns); - } - else - { - skipJSONField(*in, StringRef(names_of_columns[file_column])); - } +} - skipWhitespaceIfAny(*in); - if (in->eof()) - throw ParsingException("Unexpected end of stream while parsing JSONCompactEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA); - if (file_column + 1 != column_indexes_for_input_fields.size()) - { - assertChar(',', *in); - skipWhitespaceIfAny(*in); - } - } +void JSONCompactEachRowRowInputFormat::skipFieldDelimiter() +{ + skipWhitespaceIfAny(*in); + assertChar(',', *in); +} + +void JSONCompactEachRowRowInputFormat::skipRowEndDelimiter() +{ + skipWhitespaceIfAny(*in); assertChar(']', *in); - for (const auto & name : not_seen_columns) - columns[name]->insertDefault(); - - ext.read_columns = read_columns; - return true; -} - -void JSONCompactEachRowRowInputFormat::skipEndOfLine() -{ skipWhitespaceIfAny(*in); if (!in->eof() && (*in->position() == ',' || *in->position() == ';')) ++in->position(); @@ -196,39 +55,55 @@ void JSONCompactEachRowRowInputFormat::skipEndOfLine() skipWhitespaceIfAny(*in); } -void JSONCompactEachRowRowInputFormat::readField(size_t index, MutableColumns & columns) +String JSONCompactEachRowRowInputFormat::readFieldIntoString() { - try + skipWhitespaceIfAny(*in); + String field; + readJSONString(field, *in); + return field; +} + +void JSONCompactEachRowRowInputFormat::skipField(size_t file_column) +{ + skipWhitespaceIfAny(*in); + skipJSONField(*in, column_mapping->names_of_columns[file_column]); +} + +void JSONCompactEachRowRowInputFormat::skipHeaderRow() +{ + skipRowStartDelimiter(); + size_t i = 0; + do { - read_columns[index] = true; - const auto & type = data_types[index]; - const auto & serialization = serializations[index]; - - if (yield_strings) - { - String str; - readJSONString(str, *in); - - ReadBufferFromString buf(str); - - if (format_settings.null_as_default && !type->isNullable()) - read_columns[index] = SerializationNullable::deserializeWholeTextImpl(*columns[index], buf, format_settings, serialization); - else - serialization->deserializeWholeText(*columns[index], buf, format_settings); - } - else - { - if (format_settings.null_as_default && !type->isNullable()) - read_columns[index] = SerializationNullable::deserializeTextJSONImpl(*columns[index], *in, format_settings, serialization); - else - serialization->deserializeTextJSON(*columns[index], *in, format_settings); - } + if (i >= column_mapping->names_of_columns.size()) + throw Exception(ErrorCodes::INCORRECT_DATA, "The number of columns in a row differs from the number of column names"); + skipField(i++); + skipWhitespaceIfAny(*in); } - catch (Exception & e) + while (checkChar(',', *in)); + + skipRowEndDelimiter(); +} + +std::vector JSONCompactEachRowRowInputFormat::readHeaderRow() +{ + skipRowStartDelimiter(); + std::vector fields; + do { - e.addMessage("(while reading the value of key " + getPort().getHeader().getByPosition(index).name + ")"); - throw; + fields.push_back(readFieldIntoString()); + skipWhitespaceIfAny(*in); } + while (checkChar(',', *in)); + + skipRowEndDelimiter(); + return fields; +} + +bool JSONCompactEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & column_name) +{ + skipWhitespaceIfAny(*in); + return readFieldImpl(*in, column, type, serialization, column_name, format_settings, yield_strings); } void JSONCompactEachRowRowInputFormat::syncAfterError() @@ -236,43 +111,112 @@ void JSONCompactEachRowRowInputFormat::syncAfterError() skipToUnescapedNextLineOrEOF(*in); } +bool JSONCompactEachRowRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuffer & out) +{ + skipWhitespaceIfAny(*in); + if (!checkChar('[', *in)) + { + out << "ERROR: There is no '[' before the row.\n"; + return false; + } + + return true; +} + +bool JSONCompactEachRowRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) +{ + try + { + skipWhitespaceIfAny(*in); + assertChar(',', *in); + } + catch (const DB::Exception &) + { + if (*in->position() == ']') + { + out << "ERROR: Closing parenthesis (']') found where comma is expected." + " It's like your file has less columns than expected.\n" + "And if your file has the right number of columns, maybe it has unescaped quotes in values.\n"; + } + else + { + out << "ERROR: There is no comma. "; + verbosePrintString(in->position(), in->position() + 1, out); + out << " found instead.\n"; + } + return false; + } + + return true; +} + +bool JSONCompactEachRowRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out) +{ + skipWhitespaceIfAny(*in); + + if (in->eof()) + { + out << "ERROR: Unexpected end of file. ']' expected at the end of row."; + return false; + } + + if (!checkChar(']', *in)) + { + out << "ERROR: There is no closing parenthesis (']') at the end of the row. "; + verbosePrintString(in->position(), in->position() + 1, out); + out << " found instead.\n"; + return false; + } + + skipWhitespaceIfAny(*in); + + if (in->eof()) + return true; + + if ((*in->position() == ',' || *in->position() == ';')) + ++in->position(); + + skipWhitespaceIfAny(*in); + return true; +} + void registerInputFormatJSONCompactEachRow(FormatFactory & factory) { - factory.registerInputFormat("JSONCompactEachRow", []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) + for (bool yield_strings : {true, false}) { - return std::make_shared(buf, sample, std::move(params), settings, false, false); - }); + auto register_func = [&](const String & format_name, bool with_names, bool with_types) + { + factory.registerInputFormat(format_name, [with_names, with_types, yield_strings]( + ReadBuffer & buf, + const Block & sample, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared(sample, buf, std::move(params), with_names, with_types, yield_strings, settings); + }); + }; - factory.registerInputFormat("JSONCompactEachRowWithNamesAndTypes", []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, std::move(params), settings, true, false); - }); + registerWithNamesAndTypes(yield_strings ? "JSONCompactStringsEachRow" : "JSONCompactEachRow", register_func); + } +} - factory.registerInputFormat("JSONCompactStringsEachRow", []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) +void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory) +{ + auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - return std::make_shared(buf, sample, std::move(params), settings, false, true); - }); + /// In case when we have names and/or types in the first two/one rows, + /// we need to read at least one more row of actual data. So, set + /// the minimum of rows for segmentation engine according to + /// parameters with_names and with_types. + size_t min_rows = 1 + int(with_names) + int(with_types); + factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) + { + return fileSegmentationEngineJSONCompactEachRow(in, memory, min_chunk_size, min_rows); + }); + }; - factory.registerInputFormat("JSONCompactStringsEachRowWithNamesAndTypes", []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, std::move(params), settings, true, true); - }); + registerWithNamesAndTypes("JSONCompactEachRow", register_func); + registerWithNamesAndTypes("JSONCompactStringsEachRow", register_func); } } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index 4077eb6e008..373eb04f06c 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -17,49 +17,47 @@ class ReadBuffer; * - JSONCompactStringsEachRowWithNamesAndTypes * */ -class JSONCompactEachRowRowInputFormat : public IRowInputFormat +class JSONCompactEachRowRowInputFormat : public RowInputFormatWithNamesAndTypes { public: JSONCompactEachRowRowInputFormat( - ReadBuffer & in_, const Block & header_, + ReadBuffer & in_, Params params_, - const FormatSettings & format_settings_, bool with_names_, - bool yield_strings_); + bool with_types_, + bool yield_strings_, + const FormatSettings & format_settings_); String getName() const override { return "JSONCompactEachRowRowInputFormat"; } - - void readPrefix() override; - bool readRow(MutableColumns & columns, RowReadExtension & ext) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; - void resetParser() override; private: - void addInputColumn(const String & column_name); - void skipEndOfLine(); - void readField(size_t index, MutableColumns & columns); + bool parseRowStartWithDiagnosticInfo(WriteBuffer & out) override; + bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; + bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; + bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override + { + return *pos != ',' && *pos != ']' && *pos != ' ' && *pos != '\t'; + } - const FormatSettings format_settings; + bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override; - using IndexesMap = std::unordered_map; - IndexesMap column_indexes_by_names; + void skipField(size_t file_column) override; + void skipHeaderRow(); + void skipNames() override { skipHeaderRow(); } + void skipTypes() override { skipHeaderRow(); } + void skipRowStartDelimiter() override; + void skipFieldDelimiter() override; + void skipRowEndDelimiter() override; - using OptionalIndexes = std::vector>; - OptionalIndexes column_indexes_for_input_fields; + std::vector readHeaderRow(); + std::vector readNames() override { return readHeaderRow(); } + std::vector readTypes() override { return readHeaderRow(); } + String readFieldIntoString(); - DataTypes data_types; - std::vector read_columns; - std::vector not_seen_columns; - - /// This is for the correct exceptions in skipping unknown fields. - std::vector names_of_columns; - - /// For *WithNamesAndTypes formats. - bool with_names; - /// For JSONCompactString* formats. bool yield_strings; }; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp index 1ce4277023d..cdff7ff2070 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -13,12 +14,10 @@ JSONCompactEachRowRowOutputFormat::JSONCompactEachRowRowOutputFormat(WriteBuffer const RowOutputFormatParams & params_, const FormatSettings & settings_, bool with_names_, + bool with_types_, bool yield_strings_) - : IRowOutputFormat(header_, out_, params_), settings(settings_), with_names(with_names_), yield_strings(yield_strings_) + : IRowOutputFormat(header_, out_, params_), settings(settings_), with_names(with_names_), with_types(with_types_), yield_strings(yield_strings_) { - const auto & sample = getPort(PortKind::Main).getHeader(); - NamesAndTypesList columns(sample.getNamesAndTypesList()); - fields.assign(columns.begin(), columns.end()); } @@ -57,39 +56,40 @@ void JSONCompactEachRowRowOutputFormat::writeTotals(const Columns & columns, siz { writeChar('\n', out); size_t num_columns = columns.size(); - writeChar('[', out); + writeRowStartDelimiter(); for (size_t i = 0; i < num_columns; ++i) { if (i != 0) - JSONCompactEachRowRowOutputFormat::writeFieldDelimiter(); + writeFieldDelimiter(); - JSONCompactEachRowRowOutputFormat::writeField(*columns[i], *serializations[i], row_num); + writeField(*columns[i], *serializations[i], row_num); } - writeCString("]\n", out); + writeRowEndDelimiter(); +} + +void JSONCompactEachRowRowOutputFormat::writeLine(const std::vector & values) +{ + writeRowStartDelimiter(); + for (size_t i = 0; i < values.size(); ++i) + { + writeChar('\"', out); + writeString(values[i], out); + writeChar('\"', out); + if (i != values.size() - 1) + writeFieldDelimiter(); + } + writeRowEndDelimiter(); } void JSONCompactEachRowRowOutputFormat::doWritePrefix() { + const auto & header = getPort(PortKind::Main).getHeader(); + if (with_names) - { - writeChar('[', out); - for (size_t i = 0; i < fields.size(); ++i) - { - writeChar('\"', out); - writeString(fields[i].name, out); - writeChar('\"', out); - if (i != fields.size() - 1) - writeCString(", ", out); - } - writeCString("]\n[", out); - for (size_t i = 0; i < fields.size(); ++i) - { - writeJSONString(fields[i].type->getName(), out, settings); - if (i != fields.size() - 1) - writeCString(", ", out); - } - writeCString("]\n", out); - } + writeLine(header.getNames()); + + if (with_types) + writeLine(header.getDataTypeNames()); } void JSONCompactEachRowRowOutputFormat::consumeTotals(DB::Chunk chunk) @@ -100,45 +100,24 @@ void JSONCompactEachRowRowOutputFormat::consumeTotals(DB::Chunk chunk) void registerOutputFormatJSONCompactEachRow(FormatFactory & factory) { - factory.registerOutputFormat("JSONCompactEachRow", []( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings & format_settings) + for (bool yield_strings : {false, true}) { - return std::make_shared(buf, sample, params, format_settings, false, false); - }); - factory.markOutputFormatSupportsParallelFormatting("JSONCompactEachRow"); + auto register_func = [&](const String & format_name, bool with_names, bool with_types) + { + factory.registerOutputFormat(format_name, [yield_strings, with_names, with_types]( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, params, format_settings, with_names, with_types, yield_strings); + }); - factory.registerOutputFormat("JSONCompactEachRowWithNamesAndTypes", []( - WriteBuffer &buf, - const Block &sample, - const RowOutputFormatParams & params, - const FormatSettings &format_settings) - { - return std::make_shared(buf, sample, params, format_settings, true, false); - }); - factory.markOutputFormatSupportsParallelFormatting("JSONCompactEachRowWithNamesAndTypes"); + factory.markOutputFormatSupportsParallelFormatting(format_name); + }; - factory.registerOutputFormat("JSONCompactStringsEachRow", []( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings & format_settings) - { - return std::make_shared(buf, sample, params, format_settings, false, true); - }); - factory.markOutputFormatSupportsParallelFormatting("JSONCompactStringsEachRow"); - - factory.registerOutputFormat("JSONCompactStringsEachRowWithNamesAndTypes", []( - WriteBuffer &buf, - const Block &sample, - const RowOutputFormatParams & params, - const FormatSettings &format_settings) - { - return std::make_shared(buf, sample, params, format_settings, true, true); - }); - factory.markOutputFormatSupportsParallelFormatting("JSONCompactStringsEachRowWithNamesAndTypes"); + registerWithNamesAndTypes(yield_strings ? "JSONCompactStringsEachRow" : "JSONCompactEachRow", register_func); + } } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h index 792eb906f4b..aa12ba7e809 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h @@ -21,15 +21,14 @@ public: const RowOutputFormatParams & params_, const FormatSettings & settings_, bool with_names_, + bool with_types_, bool yield_strings_); String getName() const override { return "JSONCompactEachRowRowOutputFormat"; } void doWritePrefix() override; - void writeBeforeTotals() override {} void writeTotals(const Columns & columns, size_t row_num) override; - void writeAfterTotals() override {} void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; void writeFieldDelimiter() override; @@ -42,11 +41,11 @@ protected: void consumeExtremes(Chunk) override {} private: + void writeLine(const std::vector & values); + FormatSettings settings; - - NamesAndTypes fields; - bool with_names; + bool with_types; bool yield_strings; }; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index d04ba2a49e4..28481313974 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -136,37 +136,10 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns if (seen_columns[index]) throw Exception("Duplicate field found while parsing JSONEachRow format: " + columnName(index), ErrorCodes::INCORRECT_DATA); - try - { - seen_columns[index] = read_columns[index] = true; - const auto & type = getPort().getHeader().getByPosition(index).type; - const auto & serialization = serializations[index]; - - if (yield_strings) - { - String str; - readJSONString(str, *in); - - ReadBufferFromString buf(str); - - if (format_settings.null_as_default && !type->isNullable()) - read_columns[index] = SerializationNullable::deserializeWholeTextImpl(*columns[index], buf, format_settings, serialization); - else - serialization->deserializeWholeText(*columns[index], buf, format_settings); - } - else - { - if (format_settings.null_as_default && !type->isNullable()) - read_columns[index] = SerializationNullable::deserializeTextJSONImpl(*columns[index], *in, format_settings, serialization); - else - serialization->deserializeTextJSON(*columns[index], *in, format_settings); - } - } - catch (Exception & e) - { - e.addMessage("(while reading the value of key " + columnName(index) + ")"); - throw; - } + seen_columns[index] = true; + const auto & type = getPort().getHeader().getByPosition(index).type; + const auto & serialization = serializations[index]; + read_columns[index] = readFieldImpl(*in, *columns[index], type, serialization, columnName(index), format_settings, yield_strings); } inline bool JSONEachRowRowInputFormat::advanceToNextKey(size_t key_index) @@ -282,8 +255,13 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi if (!seen_columns[i]) header.getByPosition(i).type->insertDefaultInto(*columns[i]); - /// return info about defaults set - ext.read_columns = read_columns; + /// Return info about defaults set. + /// If defaults_for_omitted_fields is set to 0, we should just leave already inserted defaults. + if (format_settings.defaults_for_omitted_fields) + ext.read_columns = read_columns; + else + ext.read_columns.assign(read_columns.size(), true); + return true; } @@ -355,8 +333,8 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory) { - factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRowImpl); - factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRowImpl); + factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRow); + factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRow); } void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 8cb0fce609e..62c0eaa457e 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -60,7 +60,7 @@ RegexpRowInputFormat::ColumnFormat RegexpRowInputFormat::stringToFormat(const St bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns) { const auto & type = getPort().getHeader().getByPosition(index).type; - bool parse_as_nullable = format_settings.null_as_default && !type->isNullable(); + bool parse_as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); bool read = true; ReadBuffer field_buf(const_cast(matched_fields[index].data()), matched_fields[index].size(), 0); try @@ -94,9 +94,9 @@ bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns) break; case ColumnFormat::Raw: if (parse_as_nullable) - read = SerializationNullable::deserializeWholeTextImpl(*columns[index], field_buf, format_settings, serialization); + read = SerializationNullable::deserializeTextRawImpl(*columns[index], field_buf, format_settings, serialization); else - serialization->deserializeWholeText(*columns[index], field_buf, format_settings); + serialization->deserializeTextRaw(*columns[index], field_buf, format_settings); break; default: break; diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 331d6e435d1..606c67aa0d1 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -143,7 +143,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex seen_columns[index] = read_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; const auto & serialization = serializations[index]; - if (format_settings.null_as_default && !type->isNullable()) + if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) read_columns[index] = SerializationNullable::deserializeTextEscapedImpl(*columns[index], *in, format_settings, serialization); else serialization->deserializeTextEscaped(*columns[index], *in, format_settings); diff --git a/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp index 6161303d23a..14dec8420a8 100644 --- a/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowOutputFormat.cpp @@ -8,7 +8,7 @@ namespace DB { TSKVRowOutputFormat::TSKVRowOutputFormat(WriteBuffer & out_, const Block & header, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : TabSeparatedRowOutputFormat(out_, header, false, false, params_, format_settings_) + : TabSeparatedRowOutputFormat(out_, header, false, false, false, params_, format_settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); NamesAndTypesList columns(sample.getNamesAndTypesList()); diff --git a/src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h deleted file mode 100644 index 3e12388bede..00000000000 --- a/src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ - -/** A stream to input data in tsv format, but without escaping individual values. - * It only supports columns without '\n' or '\t' - */ -class TabSeparatedRawRowInputFormat : public TabSeparatedRowInputFormat -{ -public: - /** with_names - the first line is the header with the names of the columns - * with_types - on the next line header with type names - */ - TabSeparatedRawRowInputFormat( - const Block & header_, - ReadBuffer & in_, - const Params & params_, - bool with_names_, - bool with_types_, - const FormatSettings & format_settings_) - : TabSeparatedRowInputFormat(header_, in_, params_, with_names_, with_types_, format_settings_) - { - } - - String getName() const override { return "TabSeparatedRawRowInputFormat"; } - - bool readField(IColumn & column, const DataTypePtr &, const SerializationPtr & serialization, bool) override - { - String tmp; - - while (!in->eof()) - { - char * pos = find_first_symbols<'\n', '\t'>(in->position(), in->buffer().end()); - - tmp.append(in->position(), pos - in->position()); - in->position() = pos; - - if (pos == in->buffer().end()) - in->next(); - else - break; - } - - ReadBufferFromString cell(tmp); - serialization->deserializeWholeText(column, cell, format_settings); - - return true; - } -}; - -} diff --git a/src/Processors/Formats/Impl/TabSeparatedRawRowOutputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRawRowOutputFormat.h deleted file mode 100644 index dc9312e53bc..00000000000 --- a/src/Processors/Formats/Impl/TabSeparatedRawRowOutputFormat.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/** A stream for outputting data in tsv format, but without escaping individual values. - * (That is, the output is irreversible.) - */ -class TabSeparatedRawRowOutputFormat : public TabSeparatedRowOutputFormat -{ -public: - TabSeparatedRawRowOutputFormat( - WriteBuffer & out_, - const Block & header_, - bool with_names_, - bool with_types_, - const RowOutputFormatParams & params_, - const FormatSettings & format_settings_) - : TabSeparatedRowOutputFormat(out_, header_, with_names_, with_types_, params_, format_settings_) - { - } - - String getName() const override { return "TabSeparatedRawRowOutputFormat"; } - - void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override - { - serialization.serializeText(column, row_num, out, format_settings); - } -}; - -} diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index c1cf0a904ea..1e6d238b202 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -1,13 +1,11 @@ #include -#include -#include #include +#include #include -#include #include #include -#include +#include #include #include @@ -20,19 +18,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - -static void skipTSVRow(ReadBuffer & in, const size_t num_columns) -{ - NullOutput null_sink; - - for (size_t i = 0; i < num_columns; ++i) - { - readEscapedStringInto(null_sink, in); - assertChar(i == num_columns - 1 ? '\n' : '\t', in); - } -} - - /** Check for a common error case - usage of Windows line feed. */ static void checkForCarriageReturn(ReadBuffer & in) @@ -45,188 +30,73 @@ static void checkForCarriageReturn(ReadBuffer & in) ErrorCodes::INCORRECT_DATA); } - -TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, bool with_types_, const FormatSettings & format_settings_) - : RowInputFormatWithDiagnosticInfo(header_, in_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) +TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( + const Block & header_, + ReadBuffer & in_, + const Params & params_, + bool with_names_, + bool with_types_, + bool is_raw_, + const FormatSettings & format_settings_) + : RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_), is_raw(is_raw_) { - const auto & sample = getPort().getHeader(); - size_t num_columns = sample.columns(); - - data_types.resize(num_columns); - column_indexes_by_names.reserve(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - const auto & column_info = sample.getByPosition(i); - - data_types[i] = column_info.type; - column_indexes_by_names.emplace(column_info.name, i); - } - - column_mapping->column_indexes_for_input_fields.reserve(num_columns); - column_mapping->read_columns.assign(num_columns, false); } - -void TabSeparatedRowInputFormat::setupAllColumnsByTableSchema() +void TabSeparatedRowInputFormat::skipFieldDelimiter() { - const auto & header = getPort().getHeader(); - column_mapping->read_columns.assign(header.columns(), true); - column_mapping->column_indexes_for_input_fields.resize(header.columns()); - - for (size_t i = 0; i < column_mapping->column_indexes_for_input_fields.size(); ++i) - column_mapping->column_indexes_for_input_fields[i] = i; + assertChar('\t', *in); } - -void TabSeparatedRowInputFormat::addInputColumn(const String & column_name) -{ - const auto column_it = column_indexes_by_names.find(column_name); - if (column_it == column_indexes_by_names.end()) - { - if (format_settings.skip_unknown_fields) - { - column_mapping->column_indexes_for_input_fields.push_back(std::nullopt); - return; - } - - throw Exception( - "Unknown field found in TSV header: '" + column_name + "' " + - "at position " + std::to_string(column_mapping->column_indexes_for_input_fields.size()) + - "\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", - ErrorCodes::INCORRECT_DATA - ); - } - - const auto column_index = column_it->second; - - if (column_mapping->read_columns[column_index]) - throw Exception("Duplicate field found while parsing TSV header: " + column_name, ErrorCodes::INCORRECT_DATA); - - column_mapping->read_columns[column_index] = true; - column_mapping->column_indexes_for_input_fields.emplace_back(column_index); -} - - -void TabSeparatedRowInputFormat::fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension) -{ - /// It is safe to memorize this on the first run - the format guarantees this does not change - if (unlikely(row_num == 1)) - { - columns_to_fill_with_default_values.clear(); - for (size_t index = 0; index < column_mapping->read_columns.size(); ++index) - if (column_mapping->read_columns[index] == 0) - columns_to_fill_with_default_values.push_back(index); - } - - for (const auto column_index : columns_to_fill_with_default_values) - { - data_types[column_index]->insertDefaultInto(*columns[column_index]); - row_read_extension.read_columns[column_index] = false; - } -} - - -void TabSeparatedRowInputFormat::readPrefix() -{ - if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8()) - { - /// In this format, we assume that column name or type cannot contain BOM, - /// so, if format has header, - /// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it. - skipBOMIfExists(*in); - } - - /// This is a bit of abstraction leakage, but we have almost the same code in other places. - /// Thus, we check if this InputFormat is working with the "real" beginning of the data in case of parallel parsing. - if (with_names && getCurrentUnitNumber() == 0) - { - if (format_settings.with_names_use_header) - { - String column_name; - for (;;) - { - readEscapedString(column_name, *in); - if (!checkChar('\t', *in)) - { - /// Check last column for \r before adding it, otherwise an error will be: - /// "Unknown field found in TSV header" - checkForCarriageReturn(*in); - addInputColumn(column_name); - break; - } - else - addInputColumn(column_name); - } - - - if (!in->eof()) - { - assertChar('\n', *in); - } - } - else - { - setupAllColumnsByTableSchema(); - skipTSVRow(*in, column_mapping->column_indexes_for_input_fields.size()); - } - } - else if (!column_mapping->is_set) - setupAllColumnsByTableSchema(); - - if (with_types) - { - skipTSVRow(*in, column_mapping->column_indexes_for_input_fields.size()); - } -} - - -bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) +void TabSeparatedRowInputFormat::skipRowEndDelimiter() { if (in->eof()) - return false; + return; - updateDiagnosticInfo(); + if (unlikely(row_num <= 1)) + checkForCarriageReturn(*in); - ext.read_columns.assign(column_mapping->read_columns.size(), true); - for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) - { - const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; - const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); - if (column_index) - { - const auto & type = data_types[*column_index]; - ext.read_columns[*column_index] = readField(*columns[*column_index], type, serializations[*column_index], is_last_file_column); - } - else - { - NullOutput null_sink; - readEscapedStringInto(null_sink, *in); - } - - /// skip separators - if (file_column + 1 < column_mapping->column_indexes_for_input_fields.size()) - { - assertChar('\t', *in); - } - else if (!in->eof()) - { - if (unlikely(row_num == 1)) - checkForCarriageReturn(*in); - - assertChar('\n', *in); - } - } - - fillUnreadColumnsWithDefaults(columns, ext); - - return true; + assertChar('\n', *in); } +String TabSeparatedRowInputFormat::readFieldIntoString() +{ + String field; + readEscapedString(field, *in); + return field; +} + +void TabSeparatedRowInputFormat::skipField() +{ + NullOutput null_sink; + readEscapedStringInto(null_sink, *in); +} + +void TabSeparatedRowInputFormat::skipHeaderRow() +{ + do + { + skipField(); + } + while (checkChar('\t', *in)); + + skipRowEndDelimiter(); +} + +std::vector TabSeparatedRowInputFormat::readHeaderRow() +{ + std::vector fields; + do + { + fields.push_back(readFieldIntoString()); + } + while (checkChar('\t', *in)); + + skipRowEndDelimiter(); + return fields; +} bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, - const SerializationPtr & serialization, bool is_last_file_column) + const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/) { const bool at_delimiter = !is_last_file_column && !in->eof() && *in->position() == '\t'; const bool at_last_column_line_end = is_last_file_column && (in->eof() || *in->position() == '\n'); @@ -236,137 +106,110 @@ bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & column.insertDefault(); return false; } - else if (format_settings.null_as_default && !type->isNullable()) + + bool as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); + + if (is_raw) + { + if (as_nullable) + return SerializationNullable::deserializeTextRawImpl(column, *in, format_settings, serialization); + + serialization->deserializeTextRaw(column, *in, format_settings); + return true; + } + + if (as_nullable) return SerializationNullable::deserializeTextEscapedImpl(column, *in, format_settings, serialization); serialization->deserializeTextEscaped(column, *in, format_settings); return true; } -bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) +bool TabSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) { - for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) + try { - if (file_column == 0 && in->eof()) + assertChar('\t', *in); + } + catch (const DB::Exception &) + { + if (*in->position() == '\n') { - out << "\n"; - return false; + out << "ERROR: Line feed found where tab is expected." + " It's like your file has less columns than expected.\n" + "And if your file has the right number of columns, " + "maybe it has an unescaped backslash in value before tab, which causes the tab to be escaped.\n"; } - - if (column_mapping->column_indexes_for_input_fields[file_column].has_value()) + else if (*in->position() == '\r') { - const auto & header = getPort().getHeader(); - size_t col_idx = column_mapping->column_indexes_for_input_fields[file_column].value(); - if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx], - out, file_column)) - return false; + out << "ERROR: Carriage return found where tab is expected.\n"; } else { - static const String skipped_column_str = ""; - static const DataTypePtr skipped_column_type = std::make_shared(); - static const MutableColumnPtr skipped_column = skipped_column_type->createColumn(); - if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column)) - return false; - } - - /// Delimiters - if (file_column + 1 == column_mapping->column_indexes_for_input_fields.size()) - { - if (!in->eof()) - { - try - { - assertChar('\n', *in); - } - catch (const DB::Exception &) - { - if (*in->position() == '\t') - { - out << "ERROR: Tab found where line feed is expected." - " It's like your file has more columns than expected.\n" - "And if your file has the right number of columns, maybe it has an unescaped tab in a value.\n"; - } - else if (*in->position() == '\r') - { - out << "ERROR: Carriage return found where line feed is expected." - " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; - } - else - { - out << "ERROR: There is no line feed. "; - verbosePrintString(in->position(), in->position() + 1, out); - out << " found instead.\n"; - } - return false; - } - } - } - else - { - try - { - assertChar('\t', *in); - } - catch (const DB::Exception &) - { - if (*in->position() == '\n') - { - out << "ERROR: Line feed found where tab is expected." - " It's like your file has less columns than expected.\n" - "And if your file has the right number of columns, " - "maybe it has an unescaped backslash in value before tab, which causes the tab to be escaped.\n"; - } - else if (*in->position() == '\r') - { - out << "ERROR: Carriage return found where tab is expected.\n"; - } - else - { - out << "ERROR: There is no tab. "; - verbosePrintString(in->position(), in->position() + 1, out); - out << " found instead.\n"; - } - return false; - } + out << "ERROR: There is no tab. "; + verbosePrintString(in->position(), in->position() + 1, out); + out << " found instead.\n"; } + return false; } return true; } -void TabSeparatedRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) +bool TabSeparatedRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out) { - const auto & index = column_mapping->column_indexes_for_input_fields[file_column]; - if (index) - { - bool can_be_parsed_as_null = removeLowCardinality(type)->isNullable(); + if (in->eof()) + return true; - // check null value for type is not nullable. don't cross buffer bound for simplicity, so maybe missing some case - if (!can_be_parsed_as_null && !in->eof()) + try + { + assertChar('\n', *in); + } + catch (const DB::Exception &) + { + if (*in->position() == '\t') { - if (*in->position() == '\\' && in->available() >= 2) + out << "ERROR: Tab found where line feed is expected." + " It's like your file has more columns than expected.\n" + "And if your file has the right number of columns, maybe it has an unescaped tab in a value.\n"; + } + else if (*in->position() == '\r') + { + out << "ERROR: Carriage return found where line feed is expected." + " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; + } + else + { + out << "ERROR: There is no line feed. "; + verbosePrintString(in->position(), in->position() + 1, out); + out << " found instead.\n"; + } + return false; + } + + return true; +} + +void TabSeparatedRowInputFormat::checkNullValueForNonNullable(DataTypePtr type) +{ + bool can_be_parsed_as_null = type->isNullable() || type->isLowCardinalityNullable() || format_settings.null_as_default; + + // check null value for type is not nullable. don't cross buffer bound for simplicity, so maybe missing some case + if (!can_be_parsed_as_null && !in->eof()) + { + if (*in->position() == '\\' && in->available() >= 2) + { + ++in->position(); + if (*in->position() == 'N') { ++in->position(); - if (*in->position() == 'N') - { - ++in->position(); - throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected NULL value of not Nullable type {}", type->getName()); - } - else - { - --in->position(); - } + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected NULL value of not Nullable type {}", type->getName()); + } + else + { + --in->position(); } } - - const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); - readField(column, type, serializations[*index], is_last_file_column); - } - else - { - NullOutput null_sink; - readEscapedStringInto(null_sink, *in); } } @@ -375,67 +218,28 @@ void TabSeparatedRowInputFormat::syncAfterError() skipToUnescapedNextLineOrEOF(*in); } -void TabSeparatedRowInputFormat::resetParser() -{ - RowInputFormatWithDiagnosticInfo::resetParser(); - const auto & sample = getPort().getHeader(); - column_mapping->read_columns.assign(sample.columns(), false); - column_mapping->column_indexes_for_input_fields.clear(); - columns_to_fill_with_default_values.clear(); -} - void registerInputFormatTabSeparated(FormatFactory & factory) { - for (const auto * name : {"TabSeparated", "TSV"}) + for (bool is_raw : {false, true}) { - factory.registerInputFormat(name, []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) + auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - return std::make_shared(sample, buf, params, false, false, settings); - }); - } + factory.registerInputFormat(format_name, [with_names, with_types, is_raw]( + ReadBuffer & buf, + const Block & sample, + IRowInputFormat::Params params, + const FormatSettings & settings) + { + return std::make_shared(sample, buf, std::move(params), with_names, with_types, is_raw, settings); + }); + }; - for (const auto * name : {"TabSeparatedRaw", "TSVRaw"}) - { - factory.registerInputFormat(name, []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared(sample, buf, params, false, false, settings); - }); - } - - for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"}) - { - factory.registerInputFormat(name, []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared(sample, buf, params, true, false, settings); - }); - } - - for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"}) - { - factory.registerInputFormat(name, []( - ReadBuffer & buf, - const Block & sample, - IRowInputFormat::Params params, - const FormatSettings & settings) - { - return std::make_shared(sample, buf, params, true, true, settings); - }); + registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func); + registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func); } } -static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) +static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, bool is_raw, size_t min_rows) { bool need_more_data = true; char * pos = in.position(); @@ -443,13 +247,18 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer while (loadAtPosition(in, memory, pos) && need_more_data) { - pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); + if (is_raw) + pos = find_first_symbols<'\r', '\n'>(pos, in.buffer().end()); + else + pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); - else if (pos == in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + + if (pos == in.buffer().end()) continue; - else if (*pos == '\\') + + if (!is_raw && *pos == '\\') { ++pos; if (loadAtPosition(in, memory, pos)) @@ -460,7 +269,7 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer if (*pos == '\n') ++number_of_rows; - if (memory.size() + static_cast(pos - in.position()) >= min_chunk_size) + if ((memory.size() + static_cast(pos - in.position()) >= min_chunk_size) && number_of_rows >= min_rows) need_more_data = false; ++pos; } @@ -473,11 +282,29 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer void registerFileSegmentationEngineTabSeparated(FormatFactory & factory) { - // We can use the same segmentation engine for TSKV. - for (const auto & name : {"TabSeparated", "TSV", "TSKV", "TabSeparatedWithNames", "TSVWithNames"}) + for (bool is_raw : {false, true}) { - factory.registerFileSegmentationEngine(name, &fileSegmentationEngineTabSeparatedImpl); + auto register_func = [&](const String & format_name, bool with_names, bool with_types) + { + size_t min_rows = 1 + int(with_names) + int(with_types); + factory.registerFileSegmentationEngine(format_name, [is_raw, min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) + { + return fileSegmentationEngineTabSeparatedImpl(in, memory, min_chunk_size, is_raw, min_rows); + }); + }; + + registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func); + registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func); } + + // We can use the same segmentation engine for TSKV. + factory.registerFileSegmentationEngine("TSKV", []( + ReadBuffer & in, + DB::Memory<> & memory, + size_t min_chunk_size) + { + return fileSegmentationEngineTabSeparatedImpl(in, memory, min_chunk_size, false, 1); + }); } } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 8127b5ceba7..11a788bc900 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB @@ -10,46 +10,43 @@ namespace DB /** A stream to input data in tsv format. */ -class TabSeparatedRowInputFormat : public RowInputFormatWithDiagnosticInfo +class TabSeparatedRowInputFormat : public RowInputFormatWithNamesAndTypes { public: /** with_names - the first line is the header with the names of the columns * with_types - on the next line header with type names */ TabSeparatedRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, bool with_types_, const FormatSettings & format_settings_); + bool with_names_, bool with_types_, bool is_raw, const FormatSettings & format_settings_); String getName() const override { return "TabSeparatedRowInputFormat"; } - bool readRow(MutableColumns & columns, RowReadExtension &) override; - void readPrefix() override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; - void resetParser() override; - -protected: - bool with_names; - bool with_types; - const FormatSettings format_settings; - - virtual bool readField(IColumn & column, const DataTypePtr & type, - const SerializationPtr & serialization, bool is_last_file_column); - private: - DataTypes data_types; + bool is_raw; - using IndexesMap = std::unordered_map; - IndexesMap column_indexes_by_names; + bool readField(IColumn & column, const DataTypePtr & type, + const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override; - std::vector columns_to_fill_with_default_values; + void skipField(size_t /*file_column*/) override { skipField(); } + void skipField(); + void skipHeaderRow(); + void skipNames() override { skipHeaderRow(); } + void skipTypes() override { skipHeaderRow(); } + void skipFieldDelimiter() override; + void skipRowEndDelimiter() override; - void addInputColumn(const String & column_name); - void setupAllColumnsByTableSchema(); - void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension); + std::vector readHeaderRow(); + std::vector readNames() override { return readHeaderRow(); } + std::vector readTypes() override { return readHeaderRow(); } + String readFieldIntoString(); - bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; - void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; + void checkNullValueForNonNullable(DataTypePtr type) override; + + bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; + bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } }; diff --git a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp index 71d5bdba355..df0c19ad409 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp @@ -1,6 +1,6 @@ #include -#include #include +#include #include @@ -11,41 +11,43 @@ TabSeparatedRowOutputFormat::TabSeparatedRowOutputFormat( const Block & header_, bool with_names_, bool with_types_, + bool is_raw_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) - : IRowOutputFormat(header_, out_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) + : IRowOutputFormat(header_, out_, params_), with_names(with_names_), with_types(with_types_), is_raw(is_raw_), format_settings(format_settings_) { } +void TabSeparatedRowOutputFormat::writeLine(const std::vector & values) +{ + for (size_t i = 0; i < values.size(); ++i) + { + writeEscapedString(values[i], out); + if (i + 1 == values.size()) + writeRowEndDelimiter(); + else + writeFieldDelimiter(); + } +} void TabSeparatedRowOutputFormat::doWritePrefix() { const auto & header = getPort(PortKind::Main).getHeader(); - size_t columns = header.columns(); if (with_names) - { - for (size_t i = 0; i < columns; ++i) - { - writeEscapedString(header.safeGetByPosition(i).name, out); - writeChar(i == columns - 1 ? '\n' : '\t', out); - } - } + writeLine(header.getNames()); if (with_types) - { - for (size_t i = 0; i < columns; ++i) - { - writeEscapedString(header.safeGetByPosition(i).type->getName(), out); - writeChar(i == columns - 1 ? '\n' : '\t', out); - } - } + writeLine(header.getDataTypeNames()); } void TabSeparatedRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - serialization.serializeTextEscaped(column, row_num, out, format_settings); + if (is_raw) + serialization.serializeTextRaw(column, row_num, out, format_settings); + else + serialization.serializeTextEscaped(column, row_num, out, format_settings); } @@ -75,56 +77,24 @@ void TabSeparatedRowOutputFormat::writeBeforeExtremes() void registerOutputFormatTabSeparated(FormatFactory & factory) { - for (const auto * name : {"TabSeparated", "TSV"}) + for (bool is_raw : {false, true}) { - factory.registerOutputFormat(name, []( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings & settings) + auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - return std::make_shared(buf, sample, false, false, params, settings); - }); - factory.markOutputFormatSupportsParallelFormatting(name); - } + factory.registerOutputFormat(format_name, [is_raw, with_names, with_types]( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, with_names, with_types, is_raw, params, settings); + }); - for (const auto * name : {"TabSeparatedRaw", "TSVRaw"}) - { - factory.registerOutputFormat(name, []( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, false, false, params, settings); - }); - factory.markOutputFormatSupportsParallelFormatting(name); - } + factory.markOutputFormatSupportsParallelFormatting(format_name); + }; - for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"}) - { - factory.registerOutputFormat(name, []( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, true, false, params, settings); - }); - factory.markOutputFormatSupportsParallelFormatting(name); - } - - for (const auto * name : {"TabSeparatedWithNamesAndTypes", "TSVWithNamesAndTypes"}) - { - factory.registerOutputFormat(name, []( - WriteBuffer & buf, - const Block & sample, - const RowOutputFormatParams & params, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, true, true, params, settings); - }); - factory.markOutputFormatSupportsParallelFormatting(name); + registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func); + registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func); } } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h index e3190be70e8..7dcc6529f1c 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h @@ -23,6 +23,7 @@ public: const Block & header_, bool with_names_, bool with_types_, + bool is_raw_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); @@ -39,10 +40,13 @@ public: /// https://www.iana.org/assignments/media-types/text/tab-separated-values String getContentType() const override { return "text/tab-separated-values; charset=UTF-8"; } -protected: - +private: + void writeLine(const std::vector & values); bool with_names; bool with_types; + bool is_raw; + +protected: const FormatSettings format_settings; }; diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index ed98ab372b6..db5db4701a9 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -130,7 +130,7 @@ void TemplateBlockOutputFormat::serializeField(const IColumn & column, const ISe serialization.serializeTextXML(column, row_num, out, settings); break; case ColumnFormat::Raw: - serialization.serializeText(column, row_num, out, settings); + serialization.serializeTextRaw(column, row_num, out, settings); break; default: __builtin_unreachable(); diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 021b2532b39..c096b62e967 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -45,8 +45,8 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer } else { - if (format.formats[i] == ColumnFormat::Xml || format.formats[i] == ColumnFormat::Raw) - format.throwInvalidFormat("XML and Raw deserialization is not supported", i); + if (format.formats[i] == ColumnFormat::Xml) + format.throwInvalidFormat("XML deserialization is not supported", i); } } @@ -54,8 +54,8 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer std::vector column_in_format(header_.columns(), false); for (size_t i = 0; i < row_format.columnsCount(); ++i) { - if (row_format.formats[i] == ColumnFormat::Xml || row_format.formats[i] == ColumnFormat::Raw) - row_format.throwInvalidFormat("XML and Raw deserialization is not supported", i); + if (row_format.formats[i] == ColumnFormat::Xml) + row_format.throwInvalidFormat("XML deserialization is not supported", i); if (row_format.format_idx_to_column_idx[i]) { @@ -194,7 +194,7 @@ bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type, { ColumnFormat col_format = row_format.formats[file_column]; bool read = true; - bool parse_as_nullable = settings.null_as_default && !type->isNullable(); + bool parse_as_nullable = settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable(); try { switch (col_format) @@ -226,6 +226,12 @@ bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type, else serialization->deserializeTextJSON(column, buf, settings); break; + case ColumnFormat::Raw: + if (parse_as_nullable) + read = SerializationNullable::deserializeTextRawImpl(column, buf, settings, serialization); + else + serialization->deserializeTextRaw(column, buf, settings); + break; default: __builtin_unreachable(); } diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index c15d0d608ee..5f471dc0151 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -180,7 +180,7 @@ bool ValuesBlockInputFormat::tryReadValue(IColumn & column, size_t column_idx) bool read = true; const auto & type = types[column_idx]; const auto & serialization = serializations[column_idx]; - if (format_settings.null_as_default && !type->isNullable()) + if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) read = SerializationNullable::deserializeTextQuotedImpl(column, *buf, format_settings, serialization); else serialization->deserializeTextQuoted(column, *buf, format_settings); @@ -421,7 +421,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx Field value = convertFieldToType(expression_value, type, value_raw.second.get()); /// Check that we are indeed allowed to insert a NULL. - if (value.isNull() && !type.isNullable()) + if (value.isNull() && !type.isNullable() && !type.isLowCardinalityNullable()) { if (format_settings.null_as_default) { diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp new file mode 100644 index 00000000000..b5690d9dafb --- /dev/null +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -0,0 +1,260 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + +RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( + const Block & header_, + ReadBuffer & in_, + const Params & params_, + bool with_names_, + bool with_types_, + const FormatSettings & format_settings_) + : RowInputFormatWithDiagnosticInfo(header_, in_, params_), format_settings(format_settings_), with_names(with_names_), with_types(with_types_) +{ + const auto & sample = getPort().getHeader(); + size_t num_columns = sample.columns(); + + data_types.resize(num_columns); + column_indexes_by_names.reserve(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const auto & column_info = sample.getByPosition(i); + + data_types[i] = column_info.type; + column_indexes_by_names.emplace(column_info.name, i); + } +} + +void RowInputFormatWithNamesAndTypes::setupAllColumnsByTableSchema() +{ + const auto & header = getPort().getHeader(); + column_mapping->column_indexes_for_input_fields.resize(header.columns()); + column_mapping->names_of_columns = header.getNames(); + + for (size_t i = 0; i < column_mapping->column_indexes_for_input_fields.size(); ++i) + column_mapping->column_indexes_for_input_fields[i] = i; +} + +void RowInputFormatWithNamesAndTypes::addInputColumn(const String & column_name, std::vector & read_columns) +{ + column_mapping->names_of_columns.push_back(column_name); + + const auto column_it = column_indexes_by_names.find(column_name); + if (column_it == column_indexes_by_names.end()) + { + if (format_settings.skip_unknown_fields) + { + column_mapping->column_indexes_for_input_fields.push_back(std::nullopt); + return; + } + + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Unknown field found in {} header: '{}' at position {}\nSet the 'input_format_skip_unknown_fields' parameter explicitly to ignore and proceed", + getName(), column_name, column_mapping->column_indexes_for_input_fields.size()); + } + + const auto column_index = column_it->second; + + if (read_columns[column_index]) + throw Exception("Duplicate field found while parsing TSV header: " + column_name, ErrorCodes::INCORRECT_DATA); + + read_columns[column_index] = true; + column_mapping->column_indexes_for_input_fields.emplace_back(column_index); +} + +void RowInputFormatWithNamesAndTypes::readPrefix() +{ + if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8()) + { + /// We assume that column name or type cannot contain BOM, so, if format has header, + /// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it. + skipBOMIfExists(*in); + } + + /// This is a bit of abstraction leakage, but we need it in parallel parsing: + /// we check if this InputFormat is working with the "real" beginning of the data. + if (with_names && getCurrentUnitNumber() == 0) + { + if (format_settings.with_names_use_header) + { + std::vector read_columns(data_types.size(), false); + auto column_names = readNames(); + for (const auto & name : column_names) + addInputColumn(name, read_columns); + + for (size_t i = 0; i != read_columns.size(); ++i) + { + if (!read_columns[i]) + column_mapping->not_presented_columns.push_back(i); + } + } + else + { + setupAllColumnsByTableSchema(); + skipNames(); + } + } + else if (!column_mapping->is_set) + setupAllColumnsByTableSchema(); + + if (with_types && getCurrentUnitNumber() == 0) + { + if (format_settings.with_types_use_header) + { + auto types = readTypes(); + if (types.size() != column_mapping->column_indexes_for_input_fields.size()) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "The number of data types differs from the number of column names in input data"); + + /// Check that types from input matches types from header. + for (size_t i = 0; i < types.size(); ++i) + { + if (column_mapping->column_indexes_for_input_fields[i] && + data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName() != types[i]) + { + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Type of '{}' must be {}, not {}", + getPort().getHeader().getByPosition(*column_mapping->column_indexes_for_input_fields[i]).name, + data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName(), types[i]); + } + } + } + else + skipTypes(); + } +} + +void RowInputFormatWithNamesAndTypes::insertDefaultsForNotSeenColumns(MutableColumns & columns, RowReadExtension & ext) +{ + for (auto index : column_mapping->not_presented_columns) + { + columns[index]->insertDefault(); + ext.read_columns[index] = false; + } +} + +bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext) +{ + if (in->eof()) + return false; + + updateDiagnosticInfo(); + skipRowStartDelimiter(); + + ext.read_columns.resize(data_types.size()); + for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) + { + const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; + const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); + if (column_index) + ext.read_columns[*column_index] = readField( + *columns[*column_index], + data_types[*column_index], + serializations[*column_index], + is_last_file_column, + column_mapping->names_of_columns[file_column]); + else + skipField(file_column); + + if (!is_last_file_column) + skipFieldDelimiter(); + } + + skipRowEndDelimiter(); + + insertDefaultsForNotSeenColumns(columns, ext); + + /// If defaults_for_omitted_fields is set to 0, we should leave already inserted defaults. + if (!format_settings.defaults_for_omitted_fields) + ext.read_columns.assign(ext.read_columns.size(), true); + + return true; +} + +void RowInputFormatWithNamesAndTypes::resetParser() +{ + RowInputFormatWithDiagnosticInfo::resetParser(); + column_mapping->column_indexes_for_input_fields.clear(); + column_mapping->not_presented_columns.clear(); + column_mapping->names_of_columns.clear(); +} + +void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) +{ + const auto & index = column_mapping->column_indexes_for_input_fields[file_column]; + if (index) + { + checkNullValueForNonNullable(type); + const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); + readField(column, type, serializations[*index], is_last_file_column, column_mapping->names_of_columns[file_column]); + } + else + { + skipField(file_column); + } +} + +bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) +{ + if (in->eof()) + { + out << "\n"; + return false; + } + + if (!parseRowStartWithDiagnosticInfo(out)) + return false; + + for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) + { + if (column_mapping->column_indexes_for_input_fields[file_column].has_value()) + { + const auto & header = getPort().getHeader(); + size_t col_idx = column_mapping->column_indexes_for_input_fields[file_column].value(); + if (!deserializeFieldAndPrintDiagnosticInfo(header.getByPosition(col_idx).name, data_types[col_idx], *columns[col_idx], out, file_column)) + return false; + } + else + { + static const String skipped_column_str = ""; + static const DataTypePtr skipped_column_type = std::make_shared(); + static const MutableColumnPtr skipped_column = skipped_column_type->createColumn(); + if (!deserializeFieldAndPrintDiagnosticInfo(skipped_column_str, skipped_column_type, *skipped_column, out, file_column)) + return false; + } + + /// Delimiters + if (file_column + 1 != column_mapping->column_indexes_for_input_fields.size()) + { + if (!parseFieldDelimiterWithDiagnosticInfo(out)) + return false; + } + } + + return parseRowEndWithDiagnosticInfo(out); +} + + +void registerFileSegmentationEngineForFormatWithNamesAndTypes( + FormatFactory & factory, const String & base_format_name, FormatFactory::FileSegmentationEngine segmentation_engine) +{ + factory.registerFileSegmentationEngine(base_format_name, segmentation_engine); + factory.registerFileSegmentationEngine(base_format_name + "WithNames", segmentation_engine); + factory.registerFileSegmentationEngine(base_format_name + "WithNamesAndTypes", segmentation_engine); +} + + +} diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h new file mode 100644 index 00000000000..0fd83238f5f --- /dev/null +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -0,0 +1,85 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/// Base class for input formats with -WithNames and -WithNamesAndTypes suffixes. +/// It accepts 2 parameters in constructor - with_names and with_types and implements +/// input format depending on them: +/// - if with_names is true, it will expect that the first row of data contains column +/// names. If setting input_format_with_names_use_header is set to 1, columns mapping +/// will be performed. +/// - if with_types is true, it will expect that the second row of data contains column +/// types. If setting input_format_with_types_use_header is set to 1, types from input +/// will be compared types from header. +/// It's important that firstly this class reads/skips names and only +/// then reads/skips types. So you can this invariant. +class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo +{ +public: + /** with_names - in the first line the header with column names + * with_types - in the second line the header with column names + */ + RowInputFormatWithNamesAndTypes( + const Block & header_, + ReadBuffer & in_, + const Params & params_, + bool with_names_, bool with_types_, const FormatSettings & format_settings_); + + bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + void readPrefix() override; + void resetParser() override; + +protected: + /// Read single field from input. Return false if there was no real value and we inserted default value. + virtual bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) = 0; + + /// Skip single field, it's used to skip unknown columns. + virtual void skipField(size_t file_column) = 0; + /// Skip the whole row with names. + virtual void skipNames() = 0; + /// Skip the whole row with types. + virtual void skipTypes() = 0; + + /// Skip delimiters, if any. + virtual void skipRowStartDelimiter() {} + virtual void skipFieldDelimiter() {} + virtual void skipRowEndDelimiter() {} + + + /// Methods for parsing with diagnostic info. + virtual void checkNullValueForNonNullable(DataTypePtr) {} + virtual bool parseRowStartWithDiagnosticInfo(WriteBuffer &) { return true; } + virtual bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer &) { return true; } + virtual bool parseRowEndWithDiagnosticInfo(WriteBuffer &) { return true;} + bool isGarbageAfterField(size_t, ReadBuffer::Position) override {return false; } + + /// Read row with names and return the list of them. + virtual std::vector readNames() = 0; + /// Read row with types and return the list of them. + virtual std::vector readTypes() = 0; + + const FormatSettings format_settings; + DataTypes data_types; + +private: + bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; + void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; + + void setupAllColumnsByTableSchema(); + void addInputColumn(const String & column_name, std::vector & read_columns); + void insertDefaultsForNotSeenColumns(MutableColumns & columns, RowReadExtension & ext); + + bool with_names; + bool with_types; + std::unordered_map column_indexes_by_names; +}; + +void registerFileSegmentationEngineForFormatWithNamesAndTypes( + FormatFactory & factory, const String & base_format_name, FormatFactory::FileSegmentationEngine segmentation_engine); + +} diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 519de724f9e..f358db3aa1c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -244,6 +244,16 @@ struct PartRangesReadInfo bool use_uncompressed_cache = false; + static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts) + { + for (const auto & part : parts) + { + if (!part.data_part->isStoredOnRemoteDisk()) + return false; + } + return true; + } + PartRangesReadInfo( const RangesInDataParts & parts, const Settings & settings, @@ -270,9 +280,12 @@ struct PartRangesReadInfo data_settings.index_granularity, index_granularity_bytes); + auto all_parts_on_remote_disk = checkAllPartsOnRemoteFS(parts); min_marks_for_concurrent_read = MergeTreeDataSelectExecutor::minMarksForConcurrentRead( - settings.merge_tree_min_rows_for_concurrent_read, - settings.merge_tree_min_bytes_for_concurrent_read, + all_parts_on_remote_disk ? settings.merge_tree_min_rows_for_concurrent_read_for_remote_filesystem + : settings.merge_tree_min_rows_for_concurrent_read, + all_parts_on_remote_disk ? settings.merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem + : settings.merge_tree_min_bytes_for_concurrent_read, data_settings.index_granularity, index_granularity_bytes, sum_marks); diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 76920777bdc..01d51940d64 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -8,6 +8,9 @@ namespace DB /// Sink which is returned from Storage::write. class SinkToStorage : public ExceptionKeepingTransform { +/// PartitionedSink owns nested sinks. +friend class PartitionedSink; + public: explicit SinkToStorage(const Block & header); @@ -35,4 +38,5 @@ public: void consume(Chunk) override {} }; +using SinkPtr = std::shared_ptr; } diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 5fe051e9498..1276157cc91 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -5,6 +5,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -29,6 +32,113 @@ Block FillingTransform::transformHeader(Block header, const SortDescription & so return header; } +template +static FillColumnDescription::StepFunction getStepFunction( + IntervalKind kind, Int64 step, const DateLUTImpl & date_lut) +{ + switch (kind) + { + #define DECLARE_CASE(NAME) \ + case IntervalKind::NAME: \ + return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut); }; + + FOR_EACH_INTERVAL_KIND(DECLARE_CASE) + #undef DECLARE_CASE + } + __builtin_unreachable(); +} + +static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & type) +{ + auto max_type = Field::Types::Null; + WhichDataType which(type); + DataTypePtr to_type; + + /// TODO Wrong results for big integers. + if (isInteger(type) || which.isDate() || which.isDate32() || which.isDateTime()) + { + max_type = Field::Types::Int64; + to_type = std::make_shared(); + } + else if (which.isDateTime64()) + { + max_type = Field::Types::Decimal64; + const auto & date_type = static_cast(*type); + size_t precision = date_type.getPrecision(); + size_t scale = date_type.getScale(); + to_type = std::make_shared>(precision, scale); + } + else if (which.isFloat()) + { + max_type = Field::Types::Float64; + to_type = std::make_shared(); + } + else + return false; + + if (descr.fill_from.getType() > max_type + || descr.fill_to.getType() > max_type + || descr.fill_step.getType() > max_type) + return false; + + descr.fill_from = convertFieldToType(descr.fill_from, *to_type); + descr.fill_to = convertFieldToType(descr.fill_to, *to_type); + descr.fill_step = convertFieldToType(descr.fill_step, *to_type); + + if (descr.step_kind) + { + if (which.isDate() || which.isDate32()) + { + Int64 avg_seconds = get(descr.fill_step) * descr.step_kind->toAvgSeconds(); + if (avg_seconds < 86400) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); + } + + if (which.isDate()) + descr.step_func = getStepFunction(*descr.step_kind, get(descr.fill_step), DateLUT::instance()); + else if (which.isDate32()) + descr.step_func = getStepFunction(*descr.step_kind, get(descr.fill_step), DateLUT::instance()); + else if (const auto * date_time = checkAndGetDataType(type.get())) + descr.step_func = getStepFunction(*descr.step_kind, get(descr.fill_step), date_time->getTimeZone()); + else if (const auto * date_time64 = checkAndGetDataType(type.get())) + { + const auto & step_dec = get &>(descr.fill_step); + Int64 step = DecimalUtils::convertTo(step_dec.getValue(), step_dec.getScale()); + + switch (*descr.step_kind) + { + #define DECLARE_CASE(NAME) \ + case IntervalKind::NAME: \ + descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ + { \ + auto field_decimal = get>(field); \ + auto components = DecimalUtils::splitWithScaleMultiplier(field_decimal.getValue(), field_decimal.getScaleMultiplier()); \ + auto res = Add##NAME##sImpl::execute(components, step, time_zone); \ + auto res_decimal = decimalFromComponentsWithMultiplier(res, field_decimal.getScaleMultiplier()); \ + field = DecimalField(res_decimal, field_decimal.getScale()); \ + }; \ + break; + + FOR_EACH_INTERVAL_KIND(DECLARE_CASE) + #undef DECLARE_CASE + } + } + else + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName()); + } + else + { + descr.step_func = [step = descr.fill_step](Field & field) + { + applyVisitor(FieldVisitorSum(step), field); + }; + } + + return true; +} + FillingTransform::FillingTransform( const Block & header_, const SortDescription & sort_description_, bool on_totals_) : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) @@ -40,46 +150,6 @@ FillingTransform::FillingTransform( if (on_totals) return; - auto try_convert_fields = [](auto & descr, const auto & type) - { - auto max_type = Field::Types::Null; - WhichDataType which(type); - DataTypePtr to_type; - - /// TODO Wrong results for big integers. - if (isInteger(type) || which.isDate() || which.isDate32() || which.isDateTime()) - { - max_type = Field::Types::Int64; - to_type = std::make_shared(); - } - else if (which.isDateTime64()) - { - max_type = Field::Types::Decimal64; - const auto & date_type = static_cast(*type); - size_t precision = date_type.getPrecision(); - size_t scale = date_type.getScale(); - to_type = std::make_shared>(precision, scale); - } - else if (which.isFloat()) - { - max_type = Field::Types::Float64; - to_type = std::make_shared(); - } - else - return false; - - if (descr.fill_from.getType() > max_type - || descr.fill_to.getType() > max_type - || descr.fill_step.getType() > max_type) - return false; - - descr.fill_from = convertFieldToType(descr.fill_from, *to_type); - descr.fill_to = convertFieldToType(descr.fill_to, *to_type); - descr.fill_step = convertFieldToType(descr.fill_step, *to_type); - - return true; - }; - std::vector is_fill_column(header_.columns()); for (size_t i = 0, size = sort_description.size(); i < size; ++i) { @@ -90,7 +160,7 @@ FillingTransform::FillingTransform( auto & descr = filling_row.getFillDescription(i); const auto & type = header_.getByPosition(block_position).type; - if (!try_convert_fields(descr, type)) + if (!tryConvertFields(descr, type)) throw Exception("Incompatible types of WITH FILL expression values with column type " + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 2ba5359ca31..2f28095f976 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -243,7 +243,7 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl try { // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible (if password is specified using double SHA1). Otherwise SHA256 plugin is used. - if (session->getAuthenticationTypeOrLogInFailure(user_name) == DB::Authentication::SHA256_PASSWORD) + if (session->getAuthenticationTypeOrLogInFailure(user_name) == DB::AuthenticationType::SHA256_PASSWORD) { authPluginSSL(); } diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index ca74b78a109..ded9616296a 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -6,6 +6,10 @@ #include #include "IServer.h" +#if !defined(ARCADIA_BUILD) +# include +#endif + #if USE_SSL # include #endif diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index 4550e9ee8e9..9103cbaad90 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -6,6 +6,10 @@ #include #include +#if !defined(ARCADIA_BUILD) +# include +#endif + namespace DB { diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index 71be37e4b5b..26a00b7cbed 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -259,6 +259,7 @@ void URLBasedDataSourceConfiguration::set(const URLBasedDataSourceConfiguration format = conf.format; compression_method = conf.compression_method; structure = conf.structure; + http_method = conf.http_method; } @@ -286,6 +287,18 @@ std::optional getURLBasedDataSourceConfiguration(const { configuration.url = config.getString(config_prefix + ".url", ""); } + else if (key == "method") + { + configuration.http_method = config.getString(config_prefix + ".method", ""); + } + else if (key == "format") + { + configuration.format = config.getString(config_prefix + ".format", ""); + } + else if (key == "structure") + { + configuration.structure = config.getString(config_prefix + ".structure", ""); + } else if (key == "headers") { Poco::Util::AbstractConfiguration::Keys header_keys; @@ -319,6 +332,8 @@ std::optional getURLBasedDataSourceConfiguration(const if (arg_name == "url") configuration.url = arg_value.safeGet(); + else if (arg_name == "method") + configuration.http_method = arg_value.safeGet(); else if (arg_name == "format") configuration.format = arg_value.safeGet(); else if (arg_name == "compression_method") diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index e3b589b1bb4..7b6b4cc85b5 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -93,6 +93,7 @@ struct URLBasedDataSourceConfiguration String structure; std::vector> headers; + String http_method; void set(const URLBasedDataSourceConfiguration & conf); }; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 6e81f5577ab..b46668a233b 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -2,30 +2,44 @@ #if USE_HDFS -#include -#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + #include #include -#include -#include +#include +#include + +#include +#include +#include #include #include -#include -#include +#include + #include -#include -#include -#include -#include +#include + +#include +#include + #include #include #include #include -#include -#include -#include -#include -#include + #include @@ -47,8 +61,10 @@ StorageHDFS::StorageHDFS( const ConstraintsDescription & constraints_, const String & comment, ContextPtr context_, - const String & compression_method_ = "") + const String & compression_method_ = "", + ASTPtr partition_by_) : IStorage(table_id_), WithContext(context_), uri(uri_), format_name(format_name_), compression_method(compression_method_) + , partition_by(partition_by_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); @@ -224,6 +240,43 @@ private: bool is_first_chunk = true; }; + +class PartitionedHDFSSink : public PartitionedSink +{ +public: + PartitionedHDFSSink( + const ASTPtr & partition_by, + const String & uri_, + const String & format_, + const Block & sample_block_, + ContextPtr context_, + const CompressionMethod compression_method_) + : PartitionedSink(partition_by, context_, sample_block_) + , uri(uri_) + , format(format_) + , sample_block(sample_block_) + , context(context_) + , compression_method(compression_method_) + { + } + + SinkPtr createSinkForPartition(const String & partition_id) override + { + auto path = PartitionedSink::replaceWildcards(uri, partition_id); + PartitionedSink::validatePartitionKey(path, true); + return std::make_shared(path, format, sample_block, context, compression_method); + } + +private: + const String uri; + + const String format; + const Block sample_block; + ContextPtr context; + const CompressionMethod compression_method; +}; + + /* Recursive directory listing with matched paths as a result. * Have the same method in StorageFile. */ @@ -315,13 +368,31 @@ Pipe StorageHDFS::read( return Pipe::unitePipes(std::move(pipes)); } -SinkToStoragePtr StorageHDFS::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) +SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) { - return std::make_shared(uri, - format_name, - metadata_snapshot->getSampleBlock(), - getContext(), - chooseCompressionMethod(uri, compression_method)); + bool has_wildcards = uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; + const auto * insert_query = dynamic_cast(query.get()); + auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; + bool is_partitioned_implementation = partition_by_ast && has_wildcards; + + if (is_partitioned_implementation) + { + return std::make_shared( + partition_by_ast, + uri, + format_name, + metadata_snapshot->getSampleBlock(), + getContext(), + chooseCompressionMethod(uri, compression_method)); + } + else + { + return std::make_shared(uri, + format_name, + metadata_snapshot->getSampleBlock(), + getContext(), + chooseCompressionMethod(uri, compression_method)); + } } void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr context_, TableExclusiveLockHolder &) @@ -364,10 +435,15 @@ void registerStorageHDFS(StorageFactory & factory) compression_method = engine_args[2]->as().value.safeGet(); } else compression_method = "auto"; + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + return StorageHDFS::create( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method); + url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, partition_by); }, { + .supports_sort_order = true, // for partition by .source_access_type = AccessType::HDFS, }); } diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 9020a091782..945f0b9f0f1 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -36,6 +36,8 @@ public: NamesAndTypesList getVirtuals() const override; + bool supportsPartitionBy() const override { return true; } + protected: StorageHDFS( const String & uri_, @@ -45,12 +47,14 @@ protected: const ConstraintsDescription & constraints_, const String & comment, ContextPtr context_, - const String & compression_method_); + const String & compression_method_, + ASTPtr partition_by = nullptr); private: const String uri; String format_name; String compression_method; + ASTPtr partition_by; Poco::Logger * log = &Poco::Logger::get("StorageHDFS"); }; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index dd851f19906..021335fea1f 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -218,7 +218,7 @@ bool IStorage::isStaticStorage() const return false; } -BackupEntries IStorage::backup(const ASTs &, ContextPtr) const +BackupEntries IStorage::backup(const ASTs &, ContextPtr) { throw Exception("Table engine " + getName() + " doesn't support backups", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6c4bb44b63b..fa5f2c28b06 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -205,7 +205,7 @@ public: NameDependencies getDependentViewsByColumn(ContextPtr context) const; /// Prepares entries to backup data of the storage. - virtual BackupEntries backup(const ASTs & partitions, ContextPtr context) const; + virtual BackupEntries backup(const ASTs & partitions, ContextPtr context); /// Extract data from the backup and put it to the storage. virtual RestoreDataTasks restoreFromBackup(const BackupPtr & backup, const String & data_path_in_backup, const ASTs & partitions, ContextMutablePtr context); diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp index 34ab48e501d..7b736e95d25 100644 --- a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp +++ b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp @@ -53,14 +53,13 @@ WriteBufferToKafkaProducer::~WriteBufferToKafkaProducer() void WriteBufferToKafkaProducer::countRow(const Columns & columns, size_t current_row) { - if (++rows % max_rows == 0) { const std::string & last_chunk = chunks.back(); size_t last_chunk_size = offset(); // if last character of last chunk is delimiter - we don't need it - if (delim && last_chunk[last_chunk_size - 1] == delim) + if (last_chunk_size && delim && last_chunk[last_chunk_size - 1] == delim) --last_chunk_size; std::string payload; diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index ec748d4d43a..e3e6b4382f4 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -41,7 +41,7 @@ limitations under the License. */ #include #include #include -#include +#include #include diff --git a/src/Storages/MergeTree/DropPartsRanges.cpp b/src/Storages/MergeTree/DropPartsRanges.cpp index ab808f59970..a8dfd807d77 100644 --- a/src/Storages/MergeTree/DropPartsRanges.cpp +++ b/src/Storages/MergeTree/DropPartsRanges.cpp @@ -53,7 +53,7 @@ void DropPartsRanges::removeDropRange(const ReplicatedMergeTreeLogEntryPtr & ent bool DropPartsRanges::hasDropRange(const MergeTreePartInfo & new_drop_range_info) const { - for (const auto & [znode_name, drop_range] : drop_ranges) + for (const auto & [_, drop_range] : drop_ranges) { if (drop_range.contains(new_drop_range_info)) return true; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d53faf144f9..dda7f235d97 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -333,8 +333,9 @@ const KeyCondition::AtomMap KeyCondition::atom_map [] (RPNElement & out, const Field &) { out.function = RPNElement::FUNCTION_IS_NULL; - // When using NULL_LAST, isNull means [+Inf, +Inf] - out.range = Range(Field(POSITIVE_INFINITY)); + // isNull means +Inf (NULLS_LAST) or -Inf (NULLS_FIRST), + // which is equivalent to not in Range (-Inf, +Inf) + out.range = Range(); return true; } } @@ -2002,7 +2003,10 @@ BoolMask KeyCondition::checkInHyperrectangle( /// No need to apply monotonic functions as nulls are kept. bool intersects = element.range.intersectsRange(*key_range); bool contains = element.range.containsRange(*key_range); + rpn_stack.emplace_back(intersects, !contains); + if (element.function == RPNElement::FUNCTION_IS_NULL) + rpn_stack.back() = !rpn_stack.back(); } else if ( element.function == RPNElement::FUNCTION_IN_SET diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 5cb819c44a4..4a438795c88 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -255,9 +255,12 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->is_cancelled = [merges_blocker = global_ctx->merges_blocker, ttl_merges_blocker = global_ctx->ttl_merges_blocker, - need_remove = ctx->need_remove_expired_values]() -> bool + need_remove = ctx->need_remove_expired_values, + merge_list_element = global_ctx->merge_list_element_ptr]() -> bool { - return merges_blocker->isCancelled() || (need_remove && ttl_merges_blocker->isCancelled()); + return merges_blocker->isCancelled() + || (need_remove && ttl_merges_blocker->isCancelled()) + || merge_list_element->is_cancelled.load(std::memory_order_relaxed); }; /// This is the end of preparation. Execution will be per block. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index daa496fa517..dd95e3eaee2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3423,7 +3423,6 @@ Pipe MergeTreeData::alterPartition( case PartitionCommand::MoveDestinationType::TABLE: { - checkPartitionCanBeDropped(command.partition); String dest_database = query_context->resolveDatabase(command.to_database); auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}, query_context); movePartitionToTable(dest_storage, command.partition, query_context); @@ -3445,7 +3444,8 @@ Pipe MergeTreeData::alterPartition( case PartitionCommand::REPLACE_PARTITION: { - checkPartitionCanBeDropped(command.partition); + if (command.replace) + checkPartitionCanBeDropped(command.partition); String from_database = query_context->resolveDatabase(command.from_database); auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, query_context); replacePartitionFrom(from_storage, command.partition, command.replace, query_context); @@ -3500,7 +3500,7 @@ Pipe MergeTreeData::alterPartition( } -BackupEntries MergeTreeData::backup(const ASTs & partitions, ContextPtr local_context) const +BackupEntries MergeTreeData::backup(const ASTs & partitions, ContextPtr local_context) { DataPartsVector data_parts; if (partitions.empty()) @@ -3522,7 +3522,7 @@ BackupEntries MergeTreeData::backupDataParts(const DataPartsVector & data_parts) auto temp_dir_it = temp_dirs.find(disk); if (temp_dir_it == temp_dirs.end()) - temp_dir_it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp_backup_")).first; + temp_dir_it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/backup_")).first; auto temp_dir_owner = temp_dir_it->second; fs::path temp_dir = temp_dir_owner->getPath(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 22ec7ce6f53..1b617a2ec71 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -630,7 +630,7 @@ public: TableLockHolder & table_lock_holder); /// Prepares entries to backup data of the storage. - BackupEntries backup(const ASTs & partitions, ContextPtr context) const override; + BackupEntries backup(const ASTs & partitions, ContextPtr context) override; static BackupEntries backupDataParts(const DataPartsVector & data_parts); /// Extract data from the backup and put it to the storage. diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index c679d6eb869..c25b2104841 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -192,21 +192,33 @@ void MergeTreeReaderStream::seekToStart() void MergeTreeReaderStream::adjustForRange(MarkRange range) { + /** + * Note: this method is called multiple times for the same range of marks -- each time we + * read from stream, but we must update last_right_offset only if it is bigger than + * the last one to avoid redundantly cancelling prefetches. + */ auto [right_offset, mark_range_bytes] = getRightOffsetAndBytesRange(range.begin, range.end); if (!right_offset) { + if (last_right_offset && *last_right_offset == 0) + return; + + last_right_offset = 0; // Zero value means the end of file. if (cached_buffer) cached_buffer->setReadUntilEnd(); if (non_cached_buffer) non_cached_buffer->setReadUntilEnd(); } - else if (right_offset > last_right_offset) + else { + if (last_right_offset && right_offset <= last_right_offset.value()) + return; + last_right_offset = right_offset; if (cached_buffer) - cached_buffer->setReadUntilPosition(last_right_offset); + cached_buffer->setReadUntilPosition(right_offset); if (non_cached_buffer) - non_cached_buffer->setReadUntilPosition(last_right_offset); + non_cached_buffer->setReadUntilPosition(right_offset); } } diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index 601abdc5f1e..b1410953ddb 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -51,7 +51,7 @@ private: MarkCache * mark_cache; bool save_marks_in_cache; - size_t last_right_offset = 0; + std::optional last_right_offset; const MergeTreeIndexGranularityInfo * index_granularity_info; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 0ff76958ceb..e94e0b903b5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -218,8 +218,14 @@ void ReplicatedMergeTreeQueue::insertUnlocked( virtual_parts.add(virtual_part_name, nullptr); /// Don't add drop range parts to mutations /// they don't produce any useful parts - if (entry->type != LogEntry::DROP_RANGE) - addPartToMutations(virtual_part_name); + if (entry->type == LogEntry::DROP_RANGE) + continue; + + auto part_info = MergeTreePartInfo::fromPartName(virtual_part_name, format_version); + if (entry->type == LogEntry::REPLACE_RANGE && part_info.isFakeDropRangePart()) + continue; + + addPartToMutations(virtual_part_name, part_info); } /// Put 'DROP PARTITION' entries at the beginning of the queue not to make superfluous fetches of parts that will be eventually deleted @@ -435,19 +441,10 @@ void ReplicatedMergeTreeQueue::removeCoveredPartsFromMutations(const String & pa storage.mutations_finalizing_task->schedule(); } -void ReplicatedMergeTreeQueue::addPartToMutations(const String & part_name) +void ReplicatedMergeTreeQueue::addPartToMutations(const String & part_name, const MergeTreePartInfo & part_info) { - LOG_TEST(log, "Adding part {} to mutations", part_name); - - auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); - - /// Do not add special virtual parts to parts_to_do - if (part_info.isFakeDropRangePart()) - { - LOG_TEST(log, "Part {} is fake drop range part, will not add it to mutations", part_name); - return; - } + assert(!part_info.isFakeDropRangePart()); auto in_partition = mutations_by_partition.find(part_info.partition_id); if (in_partition == mutations_by_partition.end()) @@ -720,7 +717,7 @@ namespace { Names getPartNamesToMutate( - const ReplicatedMergeTreeMutationEntry & mutation, const ActiveDataPartSet & parts) + const ReplicatedMergeTreeMutationEntry & mutation, const ActiveDataPartSet & parts, const DropPartsRanges & drop_ranges) { Names result; for (const auto & pair : mutation.block_numbers) @@ -736,7 +733,11 @@ Names getPartNamesToMutate( { auto part_info = MergeTreePartInfo::fromPartName(covered_part_name, parts.getFormatVersion()); if (part_info.getDataVersion() < block_num) - result.push_back(covered_part_name); + { + /// We don't need to mutate part if it's covered by DROP_RANGE + if (!drop_ranges.hasDropRange(part_info)) + result.push_back(covered_part_name); + } } } @@ -842,7 +843,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, C } /// Initialize `mutation.parts_to_do`. First we need to mutate all parts in `current_parts`. - Strings current_parts_to_mutate = getPartNamesToMutate(*entry, current_parts); + Strings current_parts_to_mutate = getPartNamesToMutate(*entry, current_parts, drop_ranges); for (const String & current_part_to_mutate : current_parts_to_mutate) { assert(MergeTreePartInfo::fromPartName(current_part_to_mutate, format_version).level < MergeTreePartInfo::MAX_LEVEL); @@ -2235,7 +2236,7 @@ bool ReplicatedMergeTreeMergePredicate::isMutationFinished(const ReplicatedMerge { std::lock_guard lock(queue.state_mutex); - size_t suddenly_appeared_parts = getPartNamesToMutate(mutation, queue.virtual_parts).size(); + size_t suddenly_appeared_parts = getPartNamesToMutate(mutation, queue.virtual_parts, queue.drop_ranges).size(); if (suddenly_appeared_parts) { LOG_TRACE(queue.log, "Mutation {} is not done yet because {} parts to mutate suddenly appeared.", mutation.znode_name, suddenly_appeared_parts); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 273ac21eebc..133c154059e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -223,7 +223,7 @@ private: std::unique_lock & state_lock); /// Add part for mutations with block_number > part.getDataVersion() - void addPartToMutations(const String & part_name); + void addPartToMutations(const String & part_name, const MergeTreePartInfo & part_info); /// Remove covered parts from mutations (parts_to_do) which were assigned /// for mutation. If remove_covered_parts = true, than remove parts covered diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp index 2e02481e065..236a0c5b5e4 100644 --- a/src/Storages/MergeTree/localBackup.cpp +++ b/src/Storages/MergeTree/localBackup.cpp @@ -4,7 +4,6 @@ #include #include - namespace DB { @@ -43,6 +42,27 @@ static void localBackupImpl(const DiskPtr & disk, const String & source_path, co } } +class CleanupOnFail +{ +public: + explicit CleanupOnFail(std::function && cleaner_) : cleaner(cleaner_), is_success(false) {} + + ~CleanupOnFail() + { + if (!is_success) + cleaner(); + } + + void success() + { + is_success = true; + } + +private: + std::function cleaner; + bool is_success; +}; + void localBackup(const DiskPtr & disk, const String & source_path, const String & destination_path, std::optional max_level) { if (disk->exists(destination_path) && !disk->isDirectoryEmpty(destination_path)) @@ -53,6 +73,8 @@ void localBackup(const DiskPtr & disk, const String & source_path, const String size_t try_no = 0; const size_t max_tries = 10; + CleanupOnFail cleanup([&](){disk->removeRecursive(destination_path);}); + /** Files in the directory can be permanently added and deleted. * If some file is deleted during an attempt to make a backup, then try again, * because it's important to take into account any new files that might appear. @@ -88,6 +110,8 @@ void localBackup(const DiskPtr & disk, const String & source_path, const String break; } + + cleanup.success(); } } diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp new file mode 100644 index 00000000000..1d13cbe5f94 --- /dev/null +++ b/src/Storages/PartitionedSink.cpp @@ -0,0 +1,128 @@ +#include "PartitionedSink.h" + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_TEXT; +} + +PartitionedSink::PartitionedSink( + const ASTPtr & partition_by, + ContextPtr context_, + const Block & sample_block_) + : SinkToStorage(sample_block_) + , context(context_) + , sample_block(sample_block_) +{ + std::vector arguments(1, partition_by); + ASTPtr partition_by_string = makeASTFunction(FunctionToString::name, std::move(arguments)); + + auto syntax_result = TreeRewriter(context).analyze(partition_by_string, sample_block.getNamesAndTypesList()); + partition_by_expr = ExpressionAnalyzer(partition_by_string, syntax_result, context).getActions(false); + partition_by_column_name = partition_by_string->getColumnName(); +} + + +SinkPtr PartitionedSink::getSinkForPartition(const String & partition_id) +{ + auto it = sinks.find(partition_id); + if (it == sinks.end()) + { + auto sink = createSinkForPartition(partition_id); + std::tie(it, std::ignore) = sinks.emplace(partition_id, sink); + } + + return it->second; +} + + +void PartitionedSink::consume(Chunk chunk) +{ + const auto & columns = chunk.getColumns(); + + Block block_with_partition_by_expr = sample_block.cloneWithoutColumns(); + block_with_partition_by_expr.setColumns(columns); + partition_by_expr->execute(block_with_partition_by_expr); + + const auto * column = block_with_partition_by_expr.getByName(partition_by_column_name).column.get(); + + std::unordered_map sub_chunks_indices; + IColumn::Selector selector; + for (size_t row = 0; row < chunk.getNumRows(); ++row) + { + auto value = column->getDataAt(row); + auto [it, inserted] = sub_chunks_indices.emplace(value, sub_chunks_indices.size()); + selector.push_back(it->second); + } + + Chunks sub_chunks; + sub_chunks.reserve(sub_chunks_indices.size()); + for (size_t column_index = 0; column_index < columns.size(); ++column_index) + { + MutableColumns column_sub_chunks = columns[column_index]->scatter(sub_chunks_indices.size(), selector); + if (column_index == 0) /// Set sizes for sub-chunks. + { + for (const auto & column_sub_chunk : column_sub_chunks) + { + sub_chunks.emplace_back(Columns(), column_sub_chunk->size()); + } + } + for (size_t sub_chunk_index = 0; sub_chunk_index < column_sub_chunks.size(); ++sub_chunk_index) + { + sub_chunks[sub_chunk_index].addColumn(std::move(column_sub_chunks[sub_chunk_index])); + } + } + + for (const auto & [partition_id, sub_chunk_index] : sub_chunks_indices) + { + getSinkForPartition(partition_id)->consume(std::move(sub_chunks[sub_chunk_index])); + } +} + + +void PartitionedSink::onFinish() +{ + for (auto & [partition_id, sink] : sinks) + { + sink->onFinish(); + } +} + + +void PartitionedSink::validatePartitionKey(const String & str, bool allow_slash) +{ + for (const char * i = str.data(); i != str.data() + str.size(); ++i) + { + if (static_cast(*i) < 0x20 || *i == '{' || *i == '}' || *i == '*' || *i == '?' || (!allow_slash && *i == '/')) + { + /// Need to convert to UInt32 because UInt8 can't be passed to format due to "mixing character types is disallowed". + UInt32 invalid_char_byte = static_cast(static_cast(*i)); + throw DB::Exception( + ErrorCodes::CANNOT_PARSE_TEXT, "Illegal character '\\x{:02x}' in partition id starting with '{}'", + invalid_char_byte, std::string(str.data(), i - str.data())); + } + } +} + + +String PartitionedSink::replaceWildcards(const String & haystack, const String & partition_id) +{ + return boost::replace_all_copy(haystack, PartitionedSink::PARTITION_ID_WILDCARD, partition_id); +} + +} diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h new file mode 100644 index 00000000000..bc59a603fac --- /dev/null +++ b/src/Storages/PartitionedSink.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class PartitionedSink : public SinkToStorage +{ +public: + static constexpr auto PARTITION_ID_WILDCARD = "{_partition_id}"; + + PartitionedSink(const ASTPtr & partition_by, ContextPtr context_, const Block & sample_block_); + + String getName() const override { return "PartitionedSink"; } + + void consume(Chunk chunk) override; + + void onFinish() override; + + virtual SinkPtr createSinkForPartition(const String & partition_id) = 0; + + static void validatePartitionKey(const String & str, bool allow_slash); + + static String replaceWildcards(const String & haystack, const String & partition_id); + +private: + ContextPtr context; + Block sample_block; + + ExpressionActionsPtr partition_by_expr; + String partition_by_column_name; + + std::unordered_map sinks; + + SinkPtr getSinkForPartition(const String & partition_id); +}; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQSink.cpp b/src/Storages/RabbitMQ/RabbitMQSink.cpp index 2b8d5ab3810..ce569afb99b 100644 --- a/src/Storages/RabbitMQ/RabbitMQSink.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSink.cpp @@ -18,14 +18,12 @@ RabbitMQSink::RabbitMQSink( , metadata_snapshot(metadata_snapshot_) , context(context_) { + storage.unbindExchange(); } void RabbitMQSink::onStart() { - if (!storage.exchangeRemoved()) - storage.unbindExchange(); - buffer = storage.createWriteBuffer(); buffer->activateWriting(); diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index b954ad3ab23..be2806eb42a 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -63,11 +63,14 @@ RabbitMQSource::RabbitMQSource( , non_virtual_header(std::move(headers.first)) , virtual_header(std::move(headers.second)) { + storage.incrementReader(); } RabbitMQSource::~RabbitMQSource() { + storage.decrementReader(); + if (!buffer) return; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index cf9b557de25..ed2e78c115f 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -237,10 +237,46 @@ ContextMutablePtr StorageRabbitMQ::addSettings(ContextPtr local_context) const void StorageRabbitMQ::loopingFunc() { - if (!rabbit_is_ready) + connection->getHandler().startLoop(); +} + + +void StorageRabbitMQ::stopLoop() +{ + connection->getHandler().updateLoopState(Loop::STOP); +} + +void StorageRabbitMQ::stopLoopIfNoReaders() +{ + /// Stop the loop if no select was started. + /// There can be a case that selects are finished + /// but not all sources decremented the counter, then + /// it is ok that the loop is not stopped, because + /// there is a background task (streaming_task), which + /// also checks whether there is an idle loop. + std::lock_guard lock(loop_mutex); + if (readers_count) return; - if (connection->isConnected()) - connection->getHandler().startLoop(); + connection->getHandler().updateLoopState(Loop::STOP); +} + +void StorageRabbitMQ::startLoop() +{ + assert(rabbit_is_ready); + connection->getHandler().updateLoopState(Loop::RUN); + looping_task->activateAndSchedule(); +} + + +void StorageRabbitMQ::incrementReader() +{ + ++readers_count; +} + + +void StorageRabbitMQ::decrementReader() +{ + --readers_count; } @@ -262,9 +298,7 @@ void StorageRabbitMQ::connectionFunc() void StorageRabbitMQ::deactivateTask(BackgroundSchedulePool::TaskHolder & task, bool wait, bool stop_loop) { if (stop_loop) - { - connection->getHandler().updateLoopState(Loop::STOP); - } + stopLoop(); std::unique_lock lock(task_mutex, std::defer_lock); if (lock.try_lock()) @@ -290,7 +324,7 @@ size_t StorageRabbitMQ::getMaxBlockSize() const void StorageRabbitMQ::initRabbitMQ() { - if (stream_cancelled || rabbit_is_ready) + if (shutdown_called || rabbit_is_ready) return; if (use_user_setup) @@ -569,29 +603,35 @@ void StorageRabbitMQ::unbindExchange() * bindings to remove redunadant message copies, but after that mv cannot work unless those bindings are recreated. Recreating them is * not difficult but very ugly and as probably nobody will do such thing - bindings will not be recreated. */ - std::call_once(flag, [&]() + if (!exchange_removed.exchange(true)) { - streaming_task->deactivate(); - connection->getHandler().updateLoopState(Loop::STOP); - looping_task->deactivate(); + try + { + streaming_task->deactivate(); - auto rabbit_channel = connection->createChannel(); - rabbit_channel->removeExchange(bridge_exchange) - .onSuccess([&]() - { - exchange_removed.store(true); - }) - .onError([&](const char * message) - { - throw Exception("Unable to remove exchange. Reason: " + std::string(message), ErrorCodes::CANNOT_REMOVE_RABBITMQ_EXCHANGE); - }); + stopLoop(); + looping_task->deactivate(); - while (!exchange_removed.load()) - { - connection->getHandler().iterateLoop(); + auto rabbit_channel = connection->createChannel(); + rabbit_channel->removeExchange(bridge_exchange) + .onSuccess([&]() + { + connection->getHandler().stopLoop(); + }) + .onError([&](const char * message) + { + throw Exception("Unable to remove exchange. Reason: " + std::string(message), ErrorCodes::CANNOT_REMOVE_RABBITMQ_EXCHANGE); + }); + + connection->getHandler().startBlockingLoop(); + rabbit_channel->close(); } - rabbit_channel->close(); - }); + catch (...) + { + exchange_removed = false; + throw; + } + } } @@ -610,6 +650,8 @@ Pipe StorageRabbitMQ::read( if (num_created_consumers == 0) return {}; + std::lock_guard lock(loop_mutex); + auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); auto modified_context = addSettings(local_context); auto block_size = getMaxBlockSize(); @@ -645,7 +687,7 @@ Pipe StorageRabbitMQ::read( } if (!connection->getHandler().loopRunning() && connection->isConnected()) - looping_task->activateAndSchedule(); + startLoop(); LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); auto united_pipe = Pipe::unitePipes(std::move(pipes)); @@ -700,15 +742,13 @@ void StorageRabbitMQ::startup() } } - connection->getHandler().updateLoopState(Loop::RUN); streaming_task->activateAndSchedule(); } void StorageRabbitMQ::shutdown() { - stream_cancelled = true; - wait_confirm = false; + shutdown_called = true; /// In case it has not yet been able to setup connection; deactivateTask(connection_task, true, false); @@ -833,7 +873,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = connection->createChannel(); return std::make_shared( std::move(consumer_channel), connection->getHandler(), queues, ++consumer_id, - unique_strbase, log, row_delimiter, queue_size, stream_cancelled); + unique_strbase, log, row_delimiter, queue_size, shutdown_called); } @@ -841,7 +881,7 @@ ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { return std::make_shared( configuration, getContext(), routing_keys, exchange_name, exchange_type, - producer_id.fetch_add(1), persistent, wait_confirm, log, + producer_id.fetch_add(1), persistent, shutdown_called, log, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } @@ -888,23 +928,23 @@ void StorageRabbitMQ::initializeBuffers() void StorageRabbitMQ::streamingToViewsFunc() { - if (rabbit_is_ready && (connection->isConnected() || connection->reconnect())) + if (rabbit_is_ready) { - initializeBuffers(); - try { auto table_id = getStorageID(); // Check if at least one direct dependency is attached size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); + bool rabbit_connected = connection->isConnected() || connection->reconnect(); - if (dependencies_count) + if (dependencies_count && rabbit_connected) { + initializeBuffers(); auto start_time = std::chrono::steady_clock::now(); // Keep streaming as long as there are attached views and streaming is not cancelled - while (!stream_cancelled && num_created_consumers > 0) + while (!shutdown_called && num_created_consumers > 0) { if (!checkDependencies(table_id)) break; @@ -916,7 +956,7 @@ void StorageRabbitMQ::streamingToViewsFunc() /// Reschedule with backoff. if (milliseconds_to_wait < BACKOFF_TRESHOLD) milliseconds_to_wait *= 2; - connection->getHandler().updateLoopState(Loop::STOP); + stopLoopIfNoReaders(); break; } else @@ -928,7 +968,7 @@ void StorageRabbitMQ::streamingToViewsFunc() auto duration = std::chrono::duration_cast(end_time - start_time); if (duration.count() > MAX_THREAD_WORK_DURATION_MS) { - connection->getHandler().updateLoopState(Loop::STOP); + stopLoopIfNoReaders(); LOG_TRACE(log, "Reschedule streaming. Thread work duration limit exceeded."); break; } @@ -941,7 +981,12 @@ void StorageRabbitMQ::streamingToViewsFunc() } } - if (!stream_cancelled) + /// If there is no running select, stop the loop which was + /// activated by previous select. + if (connection->getHandler().loopRunning()) + stopLoopIfNoReaders(); + + if (!shutdown_called) streaming_task->scheduleAfter(milliseconds_to_wait); } @@ -995,10 +1040,7 @@ bool StorageRabbitMQ::streamToViews() block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); if (!connection->getHandler().loopRunning()) - { - connection->getHandler().updateLoopState(Loop::RUN); - looping_task->activateAndSchedule(); - } + startLoop(); { CompletedPipelineExecutor executor(block_io.pipeline); @@ -1013,7 +1055,7 @@ bool StorageRabbitMQ::streamToViews() if (!connection->isConnected()) { - if (stream_cancelled) + if (shutdown_called) return true; if (connection->reconnect()) @@ -1087,8 +1129,7 @@ bool StorageRabbitMQ::streamToViews() } else { - connection->getHandler().updateLoopState(Loop::RUN); - looping_task->activateAndSchedule(); + startLoop(); } /// Do not reschedule, do not stop event loop. diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index e9351058e95..9e5d790dc10 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -63,11 +63,13 @@ public: String getExchange() const { return exchange_name; } void unbindExchange(); - bool exchangeRemoved() { return exchange_removed.load(); } bool updateChannel(ChannelPtr & channel); void updateQueues(std::vector & queues_) { queues_ = queues; } + void incrementReader(); + void decrementReader(); + protected: StorageRabbitMQ( const StorageID & table_id_, @@ -118,9 +120,7 @@ private: String sharding_exchange, bridge_exchange, consumer_exchange; size_t consumer_id = 0; /// counter for consumer buffer, needed for channel id - std::atomic producer_id = 1; /// counter for producer buffer, needed for channel id - std::atomic wait_confirm = true; /// needed to break waiting for confirmations for producer - std::atomic exchange_removed = false, rabbit_is_ready = false; + std::vector queues; std::once_flag flag; /// remove exchange only once @@ -131,7 +131,32 @@ private: uint64_t milliseconds_to_wait; - std::atomic stream_cancelled{false}; + /** + * ╰( ͡° ͜ʖ ͡° )つ──☆* Evil atomics: + */ + /// Needed for tell MV or producer background tasks + /// that they must finish as soon as possible. + std::atomic shutdown_called{false}; + /// Counter for producer buffers, needed for channel id. + /// Needed to generate unique producer buffer identifiers. + std::atomic producer_id = 1; + /// Has connection background task completed successfully? + /// It is started only once -- in constructor. + std::atomic rabbit_is_ready = false; + /// Allow to remove exchange only once. + std::atomic exchange_removed = false; + /// For select query we must be aware of the end of streaming + /// to be able to turn off the loop. + std::atomic readers_count = 0; + + /// In select query we start event loop, but do not stop it + /// after that select is finished. Then in a thread, which + /// checks for MV we also check if we have select readers. + /// If not - we turn off the loop. The checks are done under + /// mutex to avoid having a turned off loop when select was + /// started. + std::mutex loop_mutex; + size_t read_attempts = 0; mutable bool drop_table = false; bool is_attach; @@ -145,6 +170,10 @@ private: void loopingFunc(); void connectionFunc(); + void startLoop(); + void stopLoop(); + void stopLoopIfNoReaders(); + static Names parseSettings(String settings_list); static AMQP::ExchangeType defineExchangeType(String exchange_type_); static String getTableBasedName(String name, const StorageID & table_id); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 8d891e34a64..f6b12708e81 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -32,7 +32,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( const AMQP::ExchangeType exchange_type_, const size_t channel_id_base_, const bool persistent_, - std::atomic & wait_confirm_, + std::atomic & shutdown_called_, Poco::Logger * log_, std::optional delimiter, size_t rows_per_message, @@ -44,7 +44,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , exchange_type(exchange_type_) , channel_id_base(std::to_string(channel_id_base_)) , persistent(persistent_) - , wait_confirm(wait_confirm_) + , shutdown_called(shutdown_called_) , payloads(BATCH) , returned(RETURNED_LIMIT) , log(log_) @@ -89,7 +89,7 @@ void WriteBufferToRabbitMQProducer::countRow() const std::string & last_chunk = chunks.back(); size_t last_chunk_size = offset(); - if (delim && last_chunk[last_chunk_size - 1] == delim) + if (last_chunk_size && delim && last_chunk[last_chunk_size - 1] == delim) --last_chunk_size; std::string payload; @@ -255,7 +255,7 @@ void WriteBufferToRabbitMQProducer::publish(ConcurrentBoundedQueueusable() will anyway return true, /// but must publish only after onReady callback. diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 8ed1ea643f3..ee6720ece13 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -25,7 +25,7 @@ public: const AMQP::ExchangeType exchange_type_, const size_t channel_id_base_, const bool persistent_, - std::atomic & wait_confirm_, + std::atomic & shutdown_called_, Poco::Logger * log_, std::optional delimiter, size_t rows_per_message, @@ -60,7 +60,7 @@ private: /* false: when shutdown is called; needed because table might be dropped before all acks are received * true: in all other cases */ - std::atomic & wait_confirm; + std::atomic & shutdown_called; AMQP::Table key_arguments; BackgroundSchedulePool::TaskHolder writing_task; diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index 021cd9815d7..927c070826b 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -159,9 +159,8 @@ StorageExternalDistributed::StorageExternalDistributed( } else { - Poco::URI uri(url_description); shard = std::make_shared( - uri, table_id, format_name, format_settings, columns, constraints, String{}, context, compression_method); + url_description, table_id, format_name, format_settings, columns, constraints, String{}, context, compression_method); LOG_DEBUG(&Poco::Logger::get("StorageURLDistributed"), "Adding URL: {}", url_description); } diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index bdc57bfdc6d..20db1a44897 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 978d161852b..4a1eac2a39e 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,7 @@ #include #include #include +#include #include #include @@ -60,6 +62,7 @@ namespace ErrorCodes extern const int TIMEOUT_EXCEEDED; extern const int INCOMPATIBLE_COLUMNS; extern const int CANNOT_STAT; + extern const int LOGICAL_ERROR; } namespace @@ -131,6 +134,7 @@ void checkCreationIsAllowed(ContextPtr context_global, const std::string & db_di if (fs::exists(table_path) && fs::is_directory(table_path)) throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); } + } Strings StorageFile::getPathsList(const String & table_path, const String & user_files_path, ContextPtr context, size_t & total_bytes_to_read) @@ -190,6 +194,7 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us { is_db_table = false; paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); + path_for_partitioned_write = table_path_; if (args.format_name == "Distributed") { @@ -541,33 +546,79 @@ Pipe StorageFile::read( class StorageFileSink final : public SinkToStorage { public: - explicit StorageFileSink( - StorageFile & storage_, + StorageFileSink( const StorageMetadataPtr & metadata_snapshot_, - std::unique_lock && lock_, - const CompressionMethod compression_method, - ContextPtr context, - const std::optional & format_settings, - int & flags) + const String & table_name_for_log_, + int table_fd_, + bool use_table_fd_, + std::string base_path_, + std::vector paths_, + const CompressionMethod compression_method_, + const std::optional & format_settings_, + const String format_name_, + ContextPtr context_, + int flags_) : SinkToStorage(metadata_snapshot_->getSampleBlock()) - , storage(storage_) , metadata_snapshot(metadata_snapshot_) + , table_name_for_log(table_name_for_log_) + , table_fd(table_fd_) + , use_table_fd(use_table_fd_) + , base_path(base_path_) + , paths(paths_) + , compression_method(compression_method_) + , format_name(format_name_) + , format_settings(format_settings_) + , context(context_) + , flags(flags_) + { + initialize(); + } + + StorageFileSink( + const StorageMetadataPtr & metadata_snapshot_, + const String & table_name_for_log_, + std::unique_lock && lock_, + int table_fd_, + bool use_table_fd_, + std::string base_path_, + std::vector paths_, + const CompressionMethod compression_method_, + const std::optional & format_settings_, + const String format_name_, + ContextPtr context_, + int flags_) + : SinkToStorage(metadata_snapshot_->getSampleBlock()) + , metadata_snapshot(metadata_snapshot_) + , table_name_for_log(table_name_for_log_) + , table_fd(table_fd_) + , use_table_fd(use_table_fd_) + , base_path(base_path_) + , paths(paths_) + , compression_method(compression_method_) + , format_name(format_name_) + , format_settings(format_settings_) + , context(context_) + , flags(flags_) , lock(std::move(lock_)) { if (!lock) throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + initialize(); + } + void initialize() + { std::unique_ptr naked_buffer = nullptr; - if (storage.use_table_fd) + if (use_table_fd) { - naked_buffer = std::make_unique(storage.table_fd, DBMS_DEFAULT_BUFFER_SIZE); + naked_buffer = std::make_unique(table_fd, DBMS_DEFAULT_BUFFER_SIZE); } else { - if (storage.paths.size() != 1) - throw Exception("Table '" + storage.getStorageID().getNameForLogs() + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED); + if (paths.size() != 1) + throw Exception("Table '" + table_name_for_log + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED); flags |= O_WRONLY | O_APPEND | O_CREAT; - naked_buffer = std::make_unique(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, flags); + naked_buffer = std::make_unique(paths[0], DBMS_DEFAULT_BUFFER_SIZE, flags); } /// In case of CSVWithNames we have already written prefix. @@ -576,7 +627,7 @@ public: write_buf = wrapWriteBufferWithCompressionMethod(std::move(naked_buffer), compression_method, 3); - writer = FormatFactory::instance().getOutputFormatParallelIfPossible(storage.format_name, + writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format_name, *write_buf, metadata_snapshot->getSampleBlock(), context, {}, format_settings); } @@ -606,16 +657,93 @@ public: // } private: - StorageFile & storage; StorageMetadataPtr metadata_snapshot; - std::unique_lock lock; + String table_name_for_log; + std::unique_ptr write_buf; OutputFormatPtr writer; bool prefix_written{false}; + + int table_fd; + bool use_table_fd; + std::string base_path; + std::vector paths; + CompressionMethod compression_method; + std::string format_name; + std::optional format_settings; + + ContextPtr context; + int flags; + std::unique_lock lock; }; +class PartitionedStorageFileSink : public PartitionedSink +{ +public: + PartitionedStorageFileSink( + const ASTPtr & partition_by, + const StorageMetadataPtr & metadata_snapshot_, + const String & table_name_for_log_, + std::unique_lock && lock_, + String base_path_, + String path_, + const CompressionMethod compression_method_, + const std::optional & format_settings_, + const String format_name_, + ContextPtr context_, + int flags_) + : PartitionedSink(partition_by, context_, metadata_snapshot_->getSampleBlock()) + , path(path_) + , metadata_snapshot(metadata_snapshot_) + , table_name_for_log(table_name_for_log_) + , base_path(base_path_) + , compression_method(compression_method_) + , format_name(format_name_) + , format_settings(format_settings_) + , context(context_) + , flags(flags_) + , lock(std::move(lock_)) + { + } + + SinkPtr createSinkForPartition(const String & partition_id) override + { + auto partition_path = PartitionedSink::replaceWildcards(path, partition_id); + PartitionedSink::validatePartitionKey(partition_path, true); + Strings result_paths = {partition_path}; + checkCreationIsAllowed(context, context->getUserFilesPath(), partition_path); + return std::make_shared( + metadata_snapshot, + table_name_for_log, + -1, + /* use_table_fd */false, + base_path, + result_paths, + compression_method, + format_settings, + format_name, + context, + flags); + } + +private: + const String path; + StorageMetadataPtr metadata_snapshot; + String table_name_for_log; + + std::string base_path; + CompressionMethod compression_method; + std::string format_name; + std::optional format_settings; + + ContextPtr context; + int flags; + std::unique_lock lock; +}; + + SinkToStoragePtr StorageFile::write( - const ASTPtr & /*query*/, + const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { @@ -628,20 +756,51 @@ SinkToStoragePtr StorageFile::write( if (context->getSettingsRef().engine_file_truncate_on_insert) flags |= O_TRUNC; - if (!paths.empty()) - { - path = paths[0]; - fs::create_directories(fs::path(path).parent_path()); - } + bool has_wildcards = path_for_partitioned_write.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; + const auto * insert_query = dynamic_cast(query.get()); + bool is_partitioned_implementation = insert_query && insert_query->partition_by && has_wildcards; - return std::make_shared( - *this, - metadata_snapshot, - std::unique_lock{rwlock, getLockTimeout(context)}, - chooseCompressionMethod(path, compression_method), - context, - format_settings, - flags); + if (is_partitioned_implementation) + { + if (path_for_partitioned_write.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty path for partitioned write"); + fs::create_directories(fs::path(path_for_partitioned_write).parent_path()); + + return std::make_shared( + insert_query->partition_by, + metadata_snapshot, + getStorageID().getNameForLogs(), + std::unique_lock{rwlock, getLockTimeout(context)}, + base_path, + path_for_partitioned_write, + chooseCompressionMethod(path, compression_method), + format_settings, + format_name, + context, + flags); + } + else + { + if (!paths.empty()) + { + path = paths[0]; + fs::create_directories(fs::path(path).parent_path()); + } + + return std::make_shared( + metadata_snapshot, + getStorageID().getNameForLogs(), + std::unique_lock{rwlock, getLockTimeout(context)}, + table_fd, + use_table_fd, + base_path, + paths, + chooseCompressionMethod(path, compression_method), + format_settings, + format_name, + context, + flags); + } } bool StorageFile::storesDataOnDisk() const diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 09ea44d1405..f48d1c285da 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -16,7 +16,9 @@ class StorageFileBlockOutputStream; class StorageFile final : public shared_ptr_helper, public IStorage { - friend struct shared_ptr_helper; +friend struct shared_ptr_helper; +friend class PartitionedStorageFileSink; + public: std::string getName() const override { return "File"; } @@ -66,6 +68,8 @@ public: /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. bool isColumnOriented() const; + bool supportsPartitionBy() const override { return true; } + protected: friend class StorageFileSource; friend class StorageFileSink; @@ -104,6 +108,8 @@ private: /// Total number of bytes to read (sums for multiple files in case of globs). Needed for progress bar. size_t total_bytes_to_read = 0; + + String path_for_partitioned_write; }; } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 2ce023d2424..e59579c5a33 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,11 @@ #include #include +#include +#include +#include +#include + #include #include @@ -46,6 +52,7 @@ namespace ErrorCodes extern const int SIZES_OF_MARKS_FILES_ARE_INCONSISTENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_FILE_NAME; + extern const int NOT_IMPLEMENTED; } /// NOTE: The lock `StorageLog::rwlock` is NOT kept locked while reading, @@ -879,6 +886,162 @@ IStorage::ColumnSizeByName StorageLog::getColumnSizes() const } +BackupEntries StorageLog::backup(const ASTs & partitions, ContextPtr context) +{ + if (!partitions.empty()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine {} doesn't support partitions", getName()); + + auto lock_timeout = getLockTimeout(context); + loadMarks(lock_timeout); + + ReadLock lock{rwlock, lock_timeout}; + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + if (!num_data_files || !file_checker.getFileSize(data_files[INDEX_WITH_REAL_ROW_COUNT].path)) + return {}; + + auto temp_dir_owner = std::make_shared(disk, "tmp/backup_"); + auto temp_dir = temp_dir_owner->getPath(); + disk->createDirectories(temp_dir); + + BackupEntries backup_entries; + + /// *.bin + for (const auto & data_file : data_files) + { + /// We make a copy of the data file because it can be changed later in write() or in truncate(). + String data_file_name = fileName(data_file.path); + String temp_file_path = temp_dir + "/" + data_file_name; + disk->copy(data_file.path, disk, temp_file_path); + backup_entries.emplace_back( + data_file_name, + std::make_unique( + disk, temp_file_path, file_checker.getFileSize(data_file.path), std::nullopt, temp_dir_owner)); + } + + /// __marks.mrk + if (use_marks_file) + { + /// We make a copy of the data file because it can be changed later in write() or in truncate(). + String marks_file_name = fileName(marks_file_path); + String temp_file_path = temp_dir + "/" + marks_file_name; + disk->copy(marks_file_path, disk, temp_file_path); + backup_entries.emplace_back( + marks_file_name, + std::make_unique( + disk, temp_file_path, file_checker.getFileSize(marks_file_path), std::nullopt, temp_dir_owner)); + } + + /// sizes.json + String files_info_path = file_checker.getPath(); + backup_entries.emplace_back(fileName(files_info_path), std::make_unique(disk, files_info_path)); + + /// columns.txt + backup_entries.emplace_back( + "columns.txt", std::make_unique(getInMemoryMetadata().getColumns().getAllPhysical().toString())); + + /// count.txt + if (use_marks_file) + { + size_t num_rows = data_files[INDEX_WITH_REAL_ROW_COUNT].marks.empty() ? 0 : data_files[INDEX_WITH_REAL_ROW_COUNT].marks.back().rows; + backup_entries.emplace_back("count.txt", std::make_unique(toString(num_rows))); + } + + return backup_entries; +} + +RestoreDataTasks StorageLog::restoreFromBackup(const BackupPtr & backup, const String & data_path_in_backup, const ASTs & partitions, ContextMutablePtr context) +{ + if (!partitions.empty()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine {} doesn't support partitions", getName()); + + auto restore_task = [this, backup, data_path_in_backup, context]() + { + auto lock_timeout = getLockTimeout(context); + WriteLock lock{rwlock, lock_timeout}; + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + if (!num_data_files) + return; + + /// Load the marks if not loaded yet. We have to do that now because we're going to update these marks. + loadMarks(lock); + + /// If there were no files, save zero file sizes to be able to rollback in case of error. + saveFileSizes(lock); + + try + { + /// Append data files. + for (const auto & data_file : data_files) + { + String file_path_in_backup = data_path_in_backup + fileName(data_file.path); + auto backup_entry = backup->read(file_path_in_backup); + auto in = backup_entry->getReadBuffer(); + auto out = disk->writeFile(data_file.path, max_compress_block_size, WriteMode::Append); + copyData(*in, *out); + } + + if (use_marks_file) + { + /// Append marks. + size_t num_extra_marks = 0; + String file_path_in_backup = data_path_in_backup + fileName(marks_file_path); + size_t file_size = backup->getSize(file_path_in_backup); + if (file_size % (num_data_files * sizeof(Mark)) != 0) + throw Exception("Size of marks file is inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT); + + num_extra_marks = file_size / (num_data_files * sizeof(Mark)); + + size_t num_marks = data_files[0].marks.size(); + for (auto & data_file : data_files) + data_file.marks.reserve(num_marks + num_extra_marks); + + std::vector old_data_sizes; + std::vector old_num_rows; + old_data_sizes.resize(num_data_files); + old_num_rows.resize(num_data_files); + for (size_t i = 0; i != num_data_files; ++i) + { + old_data_sizes[i] = file_checker.getFileSize(data_files[i].path); + old_num_rows[i] = num_marks ? data_files[i].marks[num_marks - 1].rows : 0; + } + + auto backup_entry = backup->read(file_path_in_backup); + auto marks_rb = backup_entry->getReadBuffer(); + + for (size_t i = 0; i != num_extra_marks; ++i) + { + for (size_t j = 0; j != num_data_files; ++j) + { + Mark mark; + mark.read(*marks_rb); + mark.rows += old_num_rows[j]; /// Adjust the number of rows. + mark.offset += old_data_sizes[j]; /// Adjust the offset. + data_files[j].marks.push_back(mark); + } + } + } + + /// Finish writing. + saveMarks(lock); + saveFileSizes(lock); + } + catch (...) + { + /// Rollback partial writes. + file_checker.repair(); + removeUnsavedMarks(lock); + throw; + } + + }; + return {restore_task}; +} + + void registerStorageLog(StorageFactory & factory) { StorageFactory::StorageFeatures features{ diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index ca87d7dcf3e..8b2ef0ccac1 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -51,6 +51,9 @@ public: bool supportsSubcolumns() const override { return true; } ColumnSizeByName getColumnSizes() const override; + BackupEntries backup(const ASTs & partitions, ContextPtr context) override; + RestoreDataTasks restoreFromBackup(const BackupPtr & backup, const String & data_path_in_backup, const ASTs & partitions, ContextMutablePtr context) override; + protected: /** Attach the table with the appropriate name, along the appropriate path (with / at the end), * (the correctness of names and paths is not verified) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 361ff95984f..e7de3010c33 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f4a50f2e553..63bb8af9148 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -849,12 +849,47 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con throw Exception("Table was not dropped because ZooKeeper session has expired.", ErrorCodes::TABLE_WAS_NOT_DROPPED); auto remote_replica_path = zookeeper_path + "/replicas/" + replica; + LOG_INFO(logger, "Removing replica {}, marking it as lost", remote_replica_path); /// Mark itself lost before removing, because the following recursive removal may fail /// and partially dropped replica may be considered as alive one (until someone will mark it lost) - zookeeper->trySet(zookeeper_path + "/replicas/" + replica + "/is_lost", "1"); + zookeeper->trySet(remote_replica_path + "/is_lost", "1"); + + /// NOTE: we should check for remote_replica_path existence, + /// since otherwise DROP REPLICA will fail if the replica had been already removed. + if (!zookeeper->exists(remote_replica_path)) + { + LOG_INFO(logger, "Removing replica {} does not exist", remote_replica_path); + return; + } + + /// Analog of removeRecursive(remote_replica_path) + /// but it removes "metadata" firstly. + /// + /// This will allow to mark table as readonly + /// and skip any checks of parts between on-disk and in the zookeeper. + /// + /// Without this removeRecursive() may remove "parts" first + /// and on DETACH/ATTACH (or server restart) it will trigger the following error: + /// + /// "The local set of parts of table X doesn't look like the set of parts in ZooKeeper" + /// + { + Strings children = zookeeper->getChildren(remote_replica_path); + + if (std::find(children.begin(), children.end(), "metadata") != children.end()) + zookeeper->remove(fs::path(remote_replica_path) / "metadata"); + + for (const auto & child : children) + { + if (child != "metadata") + zookeeper->removeRecursive(fs::path(remote_replica_path) / child); + } + + zookeeper->remove(remote_replica_path); + } + /// It may left some garbage if replica_path subtree are concurrently modified - zookeeper->tryRemoveRecursive(remote_replica_path); if (zookeeper->exists(remote_replica_path)) LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 5d8645b677d..80011cde077 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -353,7 +354,7 @@ private: }; -class PartitionedStorageS3Sink : public SinkToStorage +class PartitionedStorageS3Sink : public PartitionedSink { public: PartitionedStorageS3Sink( @@ -368,7 +369,7 @@ public: const String & key_, size_t min_upload_part_size_, size_t max_single_part_upload_size_) - : SinkToStorage(sample_block_) + : PartitionedSink(partition_by, context_, sample_block_) , format(format_) , sample_block(sample_block_) , context(context_) @@ -380,74 +381,36 @@ public: , max_single_part_upload_size(max_single_part_upload_size_) , format_settings(format_settings_) { - std::vector arguments(1, partition_by); - ASTPtr partition_by_string = makeASTFunction(FunctionToString::name, std::move(arguments)); - - auto syntax_result = TreeRewriter(context).analyze(partition_by_string, sample_block.getNamesAndTypesList()); - partition_by_expr = ExpressionAnalyzer(partition_by_string, syntax_result, context).getActions(false); - partition_by_column_name = partition_by_string->getColumnName(); } - String getName() const override { return "PartitionedStorageS3Sink"; } - - void consume(Chunk chunk) override + SinkPtr createSinkForPartition(const String & partition_id) override { - const auto & columns = chunk.getColumns(); + auto partition_bucket = replaceWildcards(bucket, partition_id); + validateBucket(partition_bucket); - Block block_with_partition_by_expr = sample_block.cloneWithoutColumns(); - block_with_partition_by_expr.setColumns(columns); - partition_by_expr->execute(block_with_partition_by_expr); + auto partition_key = replaceWildcards(key, partition_id); + validateKey(partition_key); - const auto * column = block_with_partition_by_expr.getByName(partition_by_column_name).column.get(); - - std::unordered_map sub_chunks_indices; - IColumn::Selector selector; - for (size_t row = 0; row < chunk.getNumRows(); ++row) - { - auto value = column->getDataAt(row); - auto [it, inserted] = sub_chunks_indices.emplace(value, sub_chunks_indices.size()); - selector.push_back(it->second); - } - - Chunks sub_chunks; - sub_chunks.reserve(sub_chunks_indices.size()); - for (size_t column_index = 0; column_index < columns.size(); ++column_index) - { - MutableColumns column_sub_chunks = columns[column_index]->scatter(sub_chunks_indices.size(), selector); - if (column_index == 0) /// Set sizes for sub-chunks. - { - for (const auto & column_sub_chunk : column_sub_chunks) - { - sub_chunks.emplace_back(Columns(), column_sub_chunk->size()); - } - } - for (size_t sub_chunk_index = 0; sub_chunk_index < column_sub_chunks.size(); ++sub_chunk_index) - { - sub_chunks[sub_chunk_index].addColumn(std::move(column_sub_chunks[sub_chunk_index])); - } - } - - for (const auto & [partition_id, sub_chunk_index] : sub_chunks_indices) - { - getSinkForPartition(partition_id)->consume(std::move(sub_chunks[sub_chunk_index])); - } - } - - void onFinish() override - { - for (auto & [partition_id, sink] : sinks) - { - sink->onFinish(); - } + return std::make_shared( + format, + sample_block, + context, + format_settings, + compression_method, + client, + partition_bucket, + partition_key, + min_upload_part_size, + max_single_part_upload_size + ); } private: - using SinkPtr = std::shared_ptr; - const String format; const Block sample_block; ContextPtr context; const CompressionMethod compression_method; + std::shared_ptr client; const String bucket; const String key; @@ -458,41 +421,6 @@ private: ExpressionActionsPtr partition_by_expr; String partition_by_column_name; - std::unordered_map sinks; - - static String replaceWildcards(const String & haystack, const String & partition_id) - { - return boost::replace_all_copy(haystack, PARTITION_ID_WILDCARD, partition_id); - } - - SinkPtr getSinkForPartition(const String & partition_id) - { - auto it = sinks.find(partition_id); - if (it == sinks.end()) - { - auto partition_bucket = replaceWildcards(bucket, partition_id); - validateBucket(partition_bucket); - - auto partition_key = replaceWildcards(key, partition_id); - validateKey(partition_key); - - std::tie(it, std::ignore) = sinks.emplace(partition_id, std::make_shared( - format, - sample_block, - context, - format_settings, - compression_method, - client, - partition_bucket, - partition_key, - min_upload_part_size, - max_single_part_upload_size - )); - } - - return it->second; - } - static void validateBucket(const String & str) { S3::URI::validateBucket(str, {}); @@ -517,21 +445,6 @@ private: validatePartitionKey(str, true); } - - static void validatePartitionKey(const StringRef & str, bool allow_slash) - { - for (const char * i = str.data; i != str.data + str.size; ++i) - { - if (static_cast(*i) < 0x20 || *i == '{' || *i == '}' || *i == '*' || *i == '?' || (!allow_slash && *i == '/')) - { - /// Need to convert to UInt32 because UInt8 can't be passed to format due to "mixing character types is disallowed". - UInt32 invalid_char_byte = static_cast(static_cast(*i)); - throw DB::Exception( - ErrorCodes::CANNOT_PARSE_TEXT, "Illegal character '\\x{:02x}' in partition id starting with '{}'", - invalid_char_byte, StringRef(str.data, i - str.data)); - } - } - } }; @@ -551,7 +464,8 @@ StorageS3::StorageS3( ContextPtr context_, std::optional format_settings_, const String & compression_method_, - bool distributed_processing_) + bool distributed_processing_, + ASTPtr partition_by_) : IStorage(table_id_) , client_auth{uri_, access_key_id_, secret_access_key_, max_connections_, {}, {}} /// Client and settings will be updated later , format_name(format_name_) @@ -562,6 +476,7 @@ StorageS3::StorageS3( , name(uri_.storage_name) , distributed_processing(distributed_processing_) , format_settings(format_settings_) + , partition_by(partition_by_) { context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri_.uri); StorageInMemoryMetadata storage_metadata; @@ -646,12 +561,13 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr bool has_wildcards = client_auth.uri.bucket.find(PARTITION_ID_WILDCARD) != String::npos || client_auth.uri.key.find(PARTITION_ID_WILDCARD) != String::npos; auto insert_query = std::dynamic_pointer_cast(query); - bool is_partitioned_implementation = insert_query && insert_query->partition_by && has_wildcards; + auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; + bool is_partitioned_implementation = partition_by_ast && has_wildcards; if (is_partitioned_implementation) { return std::make_shared( - insert_query->partition_by, + partition_by_ast, format_name, sample_block, local_context, @@ -833,6 +749,10 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) auto max_single_part_upload_size = args.getLocalContext()->getSettingsRef().s3_max_single_part_upload_size; auto max_connections = args.getLocalContext()->getSettingsRef().s3_max_connections; + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + return StorageS3::create( s3_uri, configuration.access_key_id, @@ -848,10 +768,13 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) args.comment, args.getContext(), format_settings, - configuration.compression_method); + configuration.compression_method, + /* distributed_processing_ */false, + partition_by); }, { .supports_settings = true, + .supports_sort_order = true, // for partition by .source_access_type = AccessType::S3, }); } diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 89bbdda87f9..8ce287ff681 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -118,7 +118,8 @@ public: ContextPtr context_, std::optional format_settings_, const String & compression_method_ = "", - bool distributed_processing_ = false); + bool distributed_processing_ = false, + ASTPtr partition_by_ = nullptr); String getName() const override { @@ -169,6 +170,7 @@ private: String name; const bool distributed_processing; std::optional format_settings; + ASTPtr partition_by; static void updateClientAndAuthSettings(ContextPtr, ClientAuthentication &); }; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index da620463ffa..92664a29767 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -9,10 +9,12 @@ #include #include +#include #include #include #include #include +#include #include #include @@ -33,6 +35,13 @@ #include #include +#include +#include +#include +#include + +#include + #include @@ -44,6 +53,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_FILE_NAME; extern const int TIMEOUT_EXCEEDED; + extern const int NOT_IMPLEMENTED; } @@ -481,6 +491,134 @@ void StorageStripeLog::saveFileSizes(const WriteLock & /* already locked for wri } +BackupEntries StorageStripeLog::backup(const ASTs & partitions, ContextPtr context) +{ + if (!partitions.empty()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine {} doesn't support partitions", getName()); + + auto lock_timeout = getLockTimeout(context); + loadIndices(lock_timeout); + + ReadLock lock{rwlock, lock_timeout}; + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + if (!file_checker.getFileSize(data_file_path)) + return {}; + + auto temp_dir_owner = std::make_shared(disk, "tmp/backup_"); + auto temp_dir = temp_dir_owner->getPath(); + disk->createDirectories(temp_dir); + + BackupEntries backup_entries; + + /// data.bin + { + /// We make a copy of the data file because it can be changed later in write() or in truncate(). + String data_file_name = fileName(data_file_path); + String temp_file_path = temp_dir + "/" + data_file_name; + disk->copy(data_file_path, disk, temp_file_path); + backup_entries.emplace_back( + data_file_name, + std::make_unique( + disk, temp_file_path, file_checker.getFileSize(data_file_path), std::nullopt, temp_dir_owner)); + } + + /// index.mrk + { + /// We make a copy of the data file because it can be changed later in write() or in truncate(). + String index_file_name = fileName(index_file_path); + String temp_file_path = temp_dir + "/" + index_file_name; + disk->copy(index_file_path, disk, temp_file_path); + backup_entries.emplace_back( + index_file_name, + std::make_unique( + disk, temp_file_path, file_checker.getFileSize(index_file_path), std::nullopt, temp_dir_owner)); + } + + /// sizes.json + String files_info_path = file_checker.getPath(); + backup_entries.emplace_back(fileName(files_info_path), std::make_unique(disk, files_info_path)); + + /// columns.txt + backup_entries.emplace_back( + "columns.txt", std::make_unique(getInMemoryMetadata().getColumns().getAllPhysical().toString())); + + /// count.txt + size_t num_rows = 0; + for (const auto & block : indices.blocks) + num_rows += block.num_rows; + backup_entries.emplace_back("count.txt", std::make_unique(toString(num_rows))); + + return backup_entries; +} + +RestoreDataTasks StorageStripeLog::restoreFromBackup(const BackupPtr & backup, const String & data_path_in_backup, const ASTs & partitions, ContextMutablePtr context) +{ + if (!partitions.empty()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine {} doesn't support partitions", getName()); + + auto restore_task = [this, backup, data_path_in_backup, context]() + { + WriteLock lock{rwlock, getLockTimeout(context)}; + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + /// Load the indices if not loaded yet. We have to do that now because we're going to update these indices. + loadIndices(lock); + + /// If there were no files, save zero file sizes to be able to rollback in case of error. + saveFileSizes(lock); + + try + { + /// Append the data file. + auto old_data_size = file_checker.getFileSize(data_file_path); + { + String file_path_in_backup = data_path_in_backup + fileName(data_file_path); + auto backup_entry = backup->read(file_path_in_backup); + auto in = backup_entry->getReadBuffer(); + auto out = disk->writeFile(data_file_path, max_compress_block_size, WriteMode::Append); + copyData(*in, *out); + } + + /// Append the index. + String index_path_in_backup = data_path_in_backup + fileName(index_file_path); + if (backup->exists(index_path_in_backup)) + { + IndexForNativeFormat extra_indices; + auto backup_entry = backup->read(index_path_in_backup); + auto index_in = backup_entry->getReadBuffer(); + CompressedReadBuffer index_compressed_in{*index_in}; + extra_indices.read(index_compressed_in); + + /// Adjust the offsets. + for (auto & block : extra_indices.blocks) + { + for (auto & column : block.columns) + column.location.offset_in_compressed_file += old_data_size; + } + + insertAtEnd(indices.blocks, std::move(extra_indices.blocks)); + } + + /// Finish writing. + saveIndices(lock); + saveFileSizes(lock); + } + catch (...) + { + /// Rollback partial writes. + file_checker.repair(); + removeUnsavedIndices(lock); + throw; + } + + }; + return {restore_task}; +} + + void registerStorageStripeLog(StorageFactory & factory) { StorageFactory::StorageFeatures features{ diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index 7ab40f867dd..579e2f991e7 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -50,6 +50,9 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder&) override; + BackupEntries backup(const ASTs & partitions, ContextPtr context) override; + RestoreDataTasks restoreFromBackup(const BackupPtr & backup, const String & data_path_in_backup, const ASTs & partitions, ContextMutablePtr context) override; + protected: StorageStripeLog( DiskPtr disk_, @@ -92,7 +95,7 @@ private: const size_t max_compress_block_size; - std::shared_timed_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Poco::Logger * log; }; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 4d8f1d8c492..66033f7a7d6 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -15,7 +16,9 @@ #include #include +#include #include +#include #include #include @@ -36,7 +39,7 @@ namespace ErrorCodes IStorageURLBase::IStorageURLBase( - const Poco::URI & uri_, + const String & uri_, ContextPtr /*context_*/, const StorageID & table_id_, const String & format_name_, @@ -45,8 +48,17 @@ IStorageURLBase::IStorageURLBase( const ConstraintsDescription & constraints_, const String & comment, const String & compression_method_, - const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_) - : IStorage(table_id_), uri(uri_), compression_method(compression_method_), format_name(format_name_), format_settings(format_settings_), headers(headers_) + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_, + const String & http_method_, + ASTPtr partition_by_) + : IStorage(table_id_) + , uri(uri_) + , compression_method(compression_method_) + , format_name(format_name_) + , format_settings(format_settings_) + , headers(headers_) + , http_method(http_method_) + , partition_by(partition_by_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); @@ -80,14 +92,15 @@ namespace return headers; } + class StorageURLSource : public SourceWithProgress { using URIParams = std::vector>; public: StorageURLSource( - const std::vector & uri_options, - const std::string & method, + const std::vector & uri_options, + const std::string & http_method, std::function callback, const String & format, const std::optional & format_settings, @@ -109,7 +122,7 @@ namespace WriteBufferFromOwnString error_message; for (auto option = uri_options.begin(); option < uri_options.end(); ++option) { - auto request_uri = *option; + auto request_uri = Poco::URI(*option); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); @@ -118,7 +131,7 @@ namespace read_buf = wrapReadBufferWithCompressionMethod( std::make_unique( request_uri, - method, + http_method, callback, timeouts, context->getSettingsRef().max_http_get_redirects, @@ -137,7 +150,7 @@ namespace if (option == uri_options.end() - 1) throw Exception(ErrorCodes::NETWORK_ERROR, "All uri options are unreachable. {}", error_message.str()); - error_message << option->toString() << " error: " << getCurrentExceptionMessage(false) << "\n"; + error_message << *option << " error: " << getCurrentExceptionMessage(false) << "\n"; tryLogCurrentException(__PRETTY_FUNCTION__); } } @@ -193,17 +206,18 @@ namespace } StorageURLSink::StorageURLSink( - const Poco::URI & uri, + const String & uri, const String & format, const std::optional & format_settings, const Block & sample_block, ContextPtr context, const ConnectionTimeouts & timeouts, - const CompressionMethod compression_method) + const CompressionMethod compression_method, + const String & http_method) : SinkToStorage(sample_block) { write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_POST, timeouts), + std::make_unique(Poco::URI(uri), http_method, timeouts), compression_method, 3); writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, {} /* write callback */, format_settings); @@ -228,6 +242,50 @@ void StorageURLSink::onFinish() write_buf->finalize(); } +class PartitionedStorageURLSink : public PartitionedSink +{ +public: + PartitionedStorageURLSink( + const ASTPtr & partition_by, + const String & uri_, + const String & format_, + const std::optional & format_settings_, + const Block & sample_block_, + ContextPtr context_, + const ConnectionTimeouts & timeouts_, + const CompressionMethod compression_method_, + const String & http_method_) + : PartitionedSink(partition_by, context_, sample_block_) + , uri(uri_) + , format(format_) + , format_settings(format_settings_) + , sample_block(sample_block_) + , context(context_) + , timeouts(timeouts_) + , compression_method(compression_method_) + , http_method(http_method_) + { + } + + SinkPtr createSinkForPartition(const String & partition_id) override + { + auto partition_path = PartitionedSink::replaceWildcards(uri, partition_id); + context->getRemoteHostFilter().checkURL(Poco::URI(partition_path)); + return std::make_shared(partition_path, format, + format_settings, sample_block, context, timeouts, compression_method, http_method); + } + +private: + const String uri; + const String format; + const std::optional format_settings; + const Block sample_block; + ContextPtr context; + const ConnectionTimeouts timeouts; + + const CompressionMethod compression_method; + const String http_method; +}; std::string IStorageURLBase::getReadMethod() const { @@ -267,22 +325,59 @@ Pipe IStorageURLBase::read( unsigned /*num_streams*/) { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); - std::vector uri_options{uri}; - return Pipe(std::make_shared( - uri_options, - getReadMethod(), - getReadPOSTDataCallback( - column_names, metadata_snapshot, query_info, - local_context, processed_stage, max_block_size), - format_name, - format_settings, - getName(), - getHeaderBlock(column_names, metadata_snapshot), - local_context, - metadata_snapshot->getColumns(), - max_block_size, - ConnectionTimeouts::getHTTPTimeouts(local_context), - compression_method, headers, params)); + bool with_globs = (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) + || uri.find('|') != std::string::npos; + + if (with_globs) + { + size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; + std::vector url_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); + std::vector uri_options; + + Pipes pipes; + for (const auto & url_description : url_descriptions) + { + /// For each uri (which acts like shard) check if it has failover options + uri_options = parseRemoteDescription(url_description, 0, url_description.size(), '|', max_addresses); + StoragePtr shard; + + pipes.emplace_back(std::make_shared( + uri_options, + getReadMethod(), + getReadPOSTDataCallback( + column_names, metadata_snapshot, query_info, + local_context, processed_stage, max_block_size), + format_name, + format_settings, + getName(), + getHeaderBlock(column_names, metadata_snapshot), + local_context, + metadata_snapshot->getColumns(), + max_block_size, + ConnectionTimeouts::getHTTPTimeouts(local_context), + compression_method, headers, params)); + } + return Pipe::unitePipes(std::move(pipes)); + } + else + { + std::vector uri_options{uri}; + return Pipe(std::make_shared( + uri_options, + getReadMethod(), + getReadPOSTDataCallback( + column_names, metadata_snapshot, query_info, + local_context, processed_stage, max_block_size), + format_name, + format_settings, + getName(), + getHeaderBlock(column_names, metadata_snapshot), + local_context, + metadata_snapshot->getColumns(), + max_block_size, + ConnectionTimeouts::getHTTPTimeouts(local_context), + compression_method, headers, params)); + } } @@ -296,6 +391,7 @@ Pipe StorageURLWithFailover::read( unsigned /*num_streams*/) { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); + auto pipe = Pipe(std::make_shared( uri_options, getReadMethod(), @@ -316,16 +412,36 @@ Pipe StorageURLWithFailover::read( } -SinkToStoragePtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) +SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { - return std::make_shared(uri, format_name, - format_settings, metadata_snapshot->getSampleBlock(), context, - ConnectionTimeouts::getHTTPTimeouts(context), - chooseCompressionMethod(uri.toString(), compression_method)); + if (http_method.empty()) + http_method = Poco::Net::HTTPRequest::HTTP_POST; + + bool has_wildcards = uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; + const auto * insert_query = dynamic_cast(query.get()); + auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; + bool is_partitioned_implementation = partition_by_ast && has_wildcards; + + if (is_partitioned_implementation) + { + return std::make_shared( + partition_by_ast, + uri, format_name, + format_settings, metadata_snapshot->getSampleBlock(), context, + ConnectionTimeouts::getHTTPTimeouts(context), + chooseCompressionMethod(uri, compression_method), http_method); + } + else + { + return std::make_shared(uri, format_name, + format_settings, metadata_snapshot->getSampleBlock(), context, + ConnectionTimeouts::getHTTPTimeouts(context), + chooseCompressionMethod(uri, compression_method), http_method); + } } StorageURL::StorageURL( - const Poco::URI & uri_, + const String & uri_, const StorageID & table_id_, const String & format_name_, const std::optional & format_settings_, @@ -334,10 +450,13 @@ StorageURL::StorageURL( const String & comment, ContextPtr context_, const String & compression_method_, - const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_) - : IStorageURLBase(uri_, context_, table_id_, format_name_, format_settings_, columns_, constraints_, comment, compression_method_, headers_) + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_, + const String & http_method_, + ASTPtr partition_by_) + : IStorageURLBase(uri_, context_, table_id_, format_name_, format_settings_, + columns_, constraints_, comment, compression_method_, headers_, http_method_, partition_by_) { - context_->getRemoteHostFilter().checkURL(uri); + context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); } @@ -350,14 +469,14 @@ StorageURLWithFailover::StorageURLWithFailover( const ConstraintsDescription & constraints_, ContextPtr context_, const String & compression_method_) - : StorageURL(Poco::URI(), table_id_, format_name_, format_settings_, columns_, constraints_, String{}, context_, compression_method_) + : StorageURL("", table_id_, format_name_, format_settings_, columns_, constraints_, String{}, context_, compression_method_) { for (const auto & uri_option : uri_options_) { Poco::URI poco_uri(uri_option); context_->getRemoteHostFilter().checkURL(poco_uri); - uri_options.emplace_back(std::move(poco_uri)); LOG_DEBUG(&Poco::Logger::get("StorageURLDistributed"), "Adding URL option: {}", uri_option); + uri_options.emplace_back(std::move(uri_option)); } } @@ -406,6 +525,13 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex auto [common_configuration, storage_specific_args] = named_collection.value(); configuration.set(common_configuration); + if (!configuration.http_method.empty() + && configuration.http_method != Poco::Net::HTTPRequest::HTTP_POST + && configuration.http_method != Poco::Net::HTTPRequest::HTTP_PUT) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Http method can be POST or PUT (current: {}). For insert default is POST, for select GET", + configuration.http_method); + if (!storage_specific_args.empty()) { String illegal_args; @@ -415,14 +541,15 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex illegal_args += ", "; illegal_args += arg.first; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown arguments {} for table function URL", illegal_args); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown argument `{}` for storage URL", illegal_args); } } else { if (args.size() != 2 && args.size() != 3) throw Exception( - "Storage URL requires 2 or 3 arguments: url, name of used format and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + "Storage URL requires 2 or 3 arguments: url, name of used format and optional compression method.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, local_context); @@ -444,7 +571,6 @@ void registerStorageURL(StorageFactory & factory) ASTs & engine_args = args.engine_args; auto configuration = StorageURL::getConfiguration(engine_args, args.getLocalContext()); auto format_settings = StorageURL::getFormatSettingsFromArgs(args); - Poco::URI uri(configuration.url); ReadWriteBufferFromHTTP::HTTPHeaderEntries headers; for (const auto & [header, value] : configuration.headers) @@ -453,8 +579,12 @@ void registerStorageURL(StorageFactory & factory) headers.emplace_back(std::make_pair(header, value_literal)); } + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + return StorageURL::create( - uri, + configuration.url, args.table_id, configuration.format, format_settings, @@ -463,7 +593,9 @@ void registerStorageURL(StorageFactory & factory) args.comment, args.getContext(), configuration.compression_method, - headers); + headers, + configuration.http_method, + partition_by); }, { .supports_settings = true, diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 1f2cac239e1..a79100c8d70 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -39,9 +39,11 @@ public: SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; + bool supportsPartitionBy() const override { return true; } + protected: IStorageURLBase( - const Poco::URI & uri_, + const String & uri_, ContextPtr context_, const StorageID & id_, const String & format_name_, @@ -50,9 +52,11 @@ protected: const ConstraintsDescription & constraints_, const String & comment, const String & compression_method_, - const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {}); + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {}, + const String & method_ = "", + ASTPtr partition_by = nullptr); - Poco::URI uri; + String uri; String compression_method; String format_name; // For URL engine, we use format settings from server context + `SETTINGS` @@ -61,6 +65,8 @@ protected: // In this case, format_settings is not set. std::optional format_settings; ReadWriteBufferFromHTTP::HTTPHeaderEntries headers; + String http_method; /// For insert can choose Put instead of default Post. + ASTPtr partition_by; virtual std::string getReadMethod() const; @@ -88,13 +94,14 @@ class StorageURLSink : public SinkToStorage { public: StorageURLSink( - const Poco::URI & uri, + const String & uri, const String & format, const std::optional & format_settings, const Block & sample_block, ContextPtr context, const ConnectionTimeouts & timeouts, - CompressionMethod compression_method); + CompressionMethod compression_method, + const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } void consume(Chunk chunk) override; @@ -112,7 +119,7 @@ class StorageURL : public shared_ptr_helper, public IStorageURLBase friend struct shared_ptr_helper; public: StorageURL( - const Poco::URI & uri_, + const String & uri_, const StorageID & table_id_, const String & format_name_, const std::optional & format_settings_, @@ -121,7 +128,9 @@ public: const String & comment, ContextPtr context_, const String & compression_method_, - const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {}); + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {}, + const String & method_ = "", + ASTPtr partition_by_ = nullptr); String getName() const override { @@ -170,6 +179,6 @@ public: }; private: - std::vector uri_options; + std::vector uri_options; }; } diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index a90e21a2edb..90ac04ed250 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -33,7 +33,7 @@ StorageXDBC::StorageXDBC( const BridgeHelperPtr bridge_helper_) /// Please add support for constraints as soon as StorageODBC or JDBC will support insertion. : IStorageURLBase( - Poco::URI(), + "", context_, table_id_, IXDBCBridgeHelper::DEFAULT_FORMAT, @@ -47,7 +47,7 @@ StorageXDBC::StorageXDBC( , remote_table_name(remote_table_name_) , log(&Poco::Logger::get("Storage" + bridge_helper->getName())) { - uri = bridge_helper->getMainURI(); + uri = bridge_helper->getMainURI().toString(); } std::string StorageXDBC::getReadMethod() const @@ -118,7 +118,7 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetad { bridge_helper->startBridgeSync(); - Poco::URI request_uri = uri; + auto request_uri = Poco::URI(uri); request_uri.setPath("/write"); auto url_params = bridge_helper->getURLParams(65536); @@ -131,13 +131,13 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetad request_uri.addQueryParameter("sample_block", metadata_snapshot->getSampleBlock().getNamesAndTypesList().toString()); return std::make_shared( - request_uri, + request_uri.toString(), format_name, getFormatSettings(local_context), metadata_snapshot->getSampleBlock(), local_context, ConnectionTimeouts::getHTTPTimeouts(local_context), - chooseCompressionMethod(uri.toString(), compression_method)); + chooseCompressionMethod(uri, compression_method)); } Block StorageXDBC::getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 89bd7856b06..4438e1c4737 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -41,7 +41,6 @@ public: std::string getName() const override; private: - BridgeHelperPtr bridge_helper; std::string remote_database_name; std::string remote_table_name; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index a097e08871d..c33fa6cad44 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -26,6 +26,7 @@ const char * auto_contributors[] { "Aleksandra (Ася)", "Aleksandr Karo", "Aleksandrov Vladimir", + "Aleksandr Shalimov", "alekseik1", "Aleksei Levushkin", "Aleksei Semiglazov", @@ -36,6 +37,7 @@ const char * auto_contributors[] { "Alexander Avdonkin", "Alexander Bezpiatov", "Alexander Burmak", + "Alexander Chashnikov", "Alexander Ermolaev", "Alexander Gololobov", "Alexander GQ Gerasiov", @@ -101,9 +103,11 @@ const char * auto_contributors[] { "ana-uvarova", "AnaUvarova", "Andr0901", + "andrc1901", "Andreas Hunkeler", "AndreevDm", "Andrei Bodrov", + "Andrei Ch", "Andrei Chulkov", "andrei-karpliuk", "Andrei Nekrashevich", @@ -127,6 +131,7 @@ const char * auto_contributors[] { "Anmol Arora", "Anna", "Anna Shakhova", + "anneji", "anneji-dev", "annvsh", "anrodigina", @@ -181,6 +186,7 @@ const char * auto_contributors[] { "BanyRule", "Baudouin Giard", "BayoNet", + "bbkas", "benamazing", "benbiti", "Benjamin Naecker", @@ -190,6 +196,7 @@ const char * auto_contributors[] { "bharatnc", "Big Elephant", "Bill", + "BiteTheDDDDt", "BlahGeek", "blazerer", "bluebirddm", @@ -220,6 +227,7 @@ const char * auto_contributors[] { "Chao Wang", "chasingegg", "chengy8934", + "chenjian", "chenqi", "chenxing-xc", "chenxing.xc", @@ -232,7 +240,9 @@ const char * auto_contributors[] { "Ciprian Hacman", "Clement Rodriguez", "Clément Rodriguez", + "ClickHouse Admin", "cn-ds", + "Cody Baker", "Colum", "comunodi", "Constantin S. Pan", @@ -343,6 +353,7 @@ const char * auto_contributors[] { "fastio", "favstovol", "FawnD2", + "Federico Ceratto", "FeehanG", "feihengye", "felixoid", @@ -372,6 +383,7 @@ const char * auto_contributors[] { "fuwhu", "Fu Zhe", "fuzhe1989", + "fuzzERot", "Gagan Arneja", "Gao Qiang", "g-arslan", @@ -407,6 +419,7 @@ const char * auto_contributors[] { "heng zhao", "hermano", "hexiaoting", + "hhell", "Hiroaki Nakamura", "hotid", "huangzhaowei", @@ -456,6 +469,7 @@ const char * auto_contributors[] { "Ivan Kush", "Ivan Kushnarenko", "Ivan Lezhankin", + "Ivan Milov", "Ivan Remen", "Ivan Starkov", "ivanzhukov", @@ -470,6 +484,7 @@ const char * auto_contributors[] { "jasine", "Jason", "Jason Keirstead", + "jasperzhu", "javartisan", "javi", "javi santana", @@ -483,6 +498,7 @@ const char * auto_contributors[] { "Jiang Tao", "jianmei zhang", "jkuklis", + "João Figueiredo", "Jochen Schalanda", "John", "John Hummel", @@ -492,9 +508,11 @@ const char * auto_contributors[] { "jyz0309", "Kang Liu", "Karl Pietrzak", + "karnevil13", "keenwolf", "Keiji Yoshida", "Ken Chen", + "Ken MacInnis", "Kevin Chiang", "Kevin Michel", "kevin wan", @@ -509,6 +527,7 @@ const char * auto_contributors[] { "KochetovNicolai", "kolsys", "Konstantin Grabar", + "Konstantin Ilchenko", "Konstantin Lebedev", "Konstantin Malanchev", "Konstantin Podshumok", @@ -529,6 +548,7 @@ const char * auto_contributors[] { "l1tsolaiki", "lalex", "Latysheva Alexandra", + "laurieliyang", "lehasm", "Léo Ercolanelli", "Leonardo Cecchi", @@ -539,6 +559,9 @@ const char * auto_contributors[] { "levushkin aleksej", "levysh", "Lewinma", + "lhuang0928", + "lhuang09287750", + "liang.huang", "liangqian", "libenwang", "lichengxiang", @@ -593,6 +616,7 @@ const char * auto_contributors[] { "mastertheknife", "Matthew Peveler", "Matwey V. Kornilov", + "Mátyás Jani", "Max", "Max Akhmedov", "Max Bruce", @@ -613,11 +637,13 @@ const char * auto_contributors[] { "maxkuzn", "maxulan", "Max Vetrov", + "MaxWk", "Mc.Spring", "mehanizm", "MeiK", "melin", "memo", + "Memo", "meo", "meoww-bot", "mergify[bot]", @@ -636,6 +662,7 @@ const char * auto_contributors[] { "Miguel Fernández", "miha-g", "Mihail Fandyushin", + "mikael", "Mikahil Nacharov", "Mike", "Mike F", @@ -658,6 +685,8 @@ const char * auto_contributors[] { "millb", "Misko Lee", "mnkonkova", + "mo-avatar", + "Mohamad Fadhil", "Mohammad Hossein Sekhavat", "morty", "moscas", @@ -698,6 +727,7 @@ const char * auto_contributors[] { "nikitamikhaylov", "Nikita Mikhaylov", "Nikita Orlov", + "Nikita Tikhomirov", "Nikita Vasilev", "Nikolai Kochetov", "Nikolai Sorokin", @@ -739,15 +769,18 @@ const char * auto_contributors[] { "Paramtamtam", "Patrick Zippenfenig", "Pavel", + "Pavel Cheremushkin", "Pavel Kartaviy", "Pavel Kartavyy", "Pavel Kovalenko", "Pavel Kruglov", "Pavel Litvinenko", + "Pavel Medvedev", "Pavel Patrin", "Pavel Yakunin", "Pavlo Bashynskiy", "Pawel Rog", + "pawelsz-rb", "pdv-ru", "Peng Jian", "Persiyanov Dmitriy Andreevich", @@ -770,6 +803,7 @@ const char * auto_contributors[] { "qianmoQ", "quid", "Quid37", + "quoctan132", "r1j1k", "Rafael David Tinoco", "rainbowsysu", @@ -789,6 +823,7 @@ const char * auto_contributors[] { "robot-clickhouse", "robot-metrika-test", "rodrigargar", + "Rohit Agarwal", "Romain Neutron", "roman", "Roman Bug", @@ -816,6 +851,7 @@ const char * auto_contributors[] { "Saulius Valatka", "sdk2", "Sean Haynes", + "Sébastien", "Sébastien Launay", "serebrserg", "Sergei Bocharov", @@ -840,10 +876,12 @@ const char * auto_contributors[] { "sev7e0", "SevaCode", "sevirov", + "Seyed Mehrshad Hosseini", "sfod", "shangshujie", "shedx", "Sherry Wang", + "Shoh Jahon", "Silviu Caragea", "Simeon Emanuilov", "Simon Liu", @@ -887,16 +925,23 @@ const char * auto_contributors[] { "Taleh Zaliyev", "Tangaev", "tao jiang", + "Tatiana", "Tatiana Kirillova", "tavplubix", "TCeason", + "Teja", + "Teja Srivastasa", "Tema Novikov", "templarzq", + "Tentoshka", "terrylin", "The-Alchemist", "Thomas Berdy", + "Thom O'Connor", + "tianzhou", "Tiaonmmn", "tiger.yan", + "Tigran Khudaverdyan", "tison", "TiunovNN", "Tobias Adamson", @@ -934,7 +979,9 @@ const char * auto_contributors[] { "Veloman Yunkan", "Veniamin Gvozdikov", "Veselkov Konstantin", + "vesslanjin", "vgocoder", + "Viachaslau Boben", "vic", "vicdashkov", "vicgao", @@ -950,6 +997,7 @@ const char * auto_contributors[] { "Vitaliy Zakaznikov", "Vitaly", "Vitaly Baranov", + "Vitaly Orlov", "Vitaly Samigullin", "Vitaly Stoyan", "vitstn", @@ -980,6 +1028,7 @@ const char * auto_contributors[] { "vzakaznikov", "wangchao", "Wang Fenjin", + "WangZengrui", "weeds085490", "Weiqing Xu", "William Shallum", @@ -994,6 +1043,7 @@ const char * auto_contributors[] { "Yangkuan Liu", "yangshuai", "Yatsishin Ilya", + "yeer", "Yegor Andreenko", "Yegor Levankov", "ygrek", @@ -1023,6 +1073,7 @@ const char * auto_contributors[] { "ywill3", "zamulla", "zhang2014", + "zhanglistar", "zhangshengyu", "zhangxiao018", "zhangxiao871", @@ -1051,6 +1102,7 @@ const char * auto_contributors[] { "Смитюх Вячеслав", "Сундуков Алексей", "万康", + "凌涛", "吴健", "小路", "张中南", @@ -1058,8 +1110,10 @@ const char * auto_contributors[] { "张风啸", "徐炘", "曲正鹏", + "木木夕120", "未来星___费", "极客青年", + "枢木", "董海镔", "谢磊", "贾顺名(Jarvis)", diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index 1ba5e6d96a4..d443830ee0a 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -7,8 +7,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -38,7 +38,7 @@ NamesAndTypesList StorageSystemGrants::getNamesAndTypes() void StorageSystemGrants::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_USERS | AccessType::SHOW_ROLES); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); boost::range::push_back(ids, access_control.findAll()); diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index ca369efe43a..6a4d2e1087e 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -1,18 +1,18 @@ #include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index 3b64f72d621..330b9935b48 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -6,9 +6,9 @@ #include #include #include -#include +#include #include -#include +#include #include #include @@ -72,7 +72,7 @@ NamesAndTypesList StorageSystemQuotaLimits::getNamesAndTypes() void StorageSystemQuotaLimits::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_QUOTAS); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); size_t column_index = 0; diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp index 6723037cf3b..a08f6686030 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.cpp +++ b/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -7,9 +7,9 @@ #include #include #include -#include +#include #include -#include +#include #include diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index f5f57b48232..2294af87fed 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -1,17 +1,17 @@ #include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include #include -#include -#include -#include -#include +#include #include @@ -55,7 +55,7 @@ NamesAndTypesList StorageSystemQuotas::getNamesAndTypes() void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_QUOTAS); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); size_t column_index = 0; diff --git a/src/Storages/System/StorageSystemQuotasUsage.cpp b/src/Storages/System/StorageSystemQuotasUsage.cpp index 363562bce19..fae0629a209 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.cpp +++ b/src/Storages/System/StorageSystemQuotasUsage.cpp @@ -1,8 +1,8 @@ #include #include #include -#include -#include +#include +#include #include @@ -16,7 +16,7 @@ NamesAndTypesList StorageSystemQuotasUsage::getNamesAndTypes() void StorageSystemQuotasUsage::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_QUOTAS); - auto all_quotas_usage = context->getAccessControlManager().getAllQuotasUsage(); + auto all_quotas_usage = context->getAccessControl().getAllQuotasUsage(); StorageSystemQuotaUsage::fillDataImpl(res_columns, context, /* add_column_is_current = */ true, all_quotas_usage); } } diff --git a/src/Storages/System/StorageSystemRoleGrants.cpp b/src/Storages/System/StorageSystemRoleGrants.cpp index 32984afcfc5..080c73726bc 100644 --- a/src/Storages/System/StorageSystemRoleGrants.cpp +++ b/src/Storages/System/StorageSystemRoleGrants.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -34,7 +34,7 @@ NamesAndTypesList StorageSystemRoleGrants::getNamesAndTypes() void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_USERS | AccessType::SHOW_ROLES); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); boost::range::push_back(ids, access_control.findAll()); diff --git a/src/Storages/System/StorageSystemRoles.cpp b/src/Storages/System/StorageSystemRoles.cpp index 42983670466..fcc45d1374f 100644 --- a/src/Storages/System/StorageSystemRoles.cpp +++ b/src/Storages/System/StorageSystemRoles.cpp @@ -3,9 +3,9 @@ #include #include #include -#include +#include #include -#include +#include #include @@ -26,7 +26,7 @@ NamesAndTypesList StorageSystemRoles::getNamesAndTypes() void StorageSystemRoles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_ROLES); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); size_t column_index = 0; diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 3fa62bc784b..202ec5078d8 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -1,18 +1,18 @@ #include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include #include #include @@ -55,7 +55,7 @@ NamesAndTypesList StorageSystemRowPolicies::getNamesAndTypes() void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_ROW_POLICIES); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); size_t column_index = 0; diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.cpp b/src/Storages/System/StorageSystemSettingsProfileElements.cpp index fa824091238..b2991baf9cb 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.cpp +++ b/src/Storages/System/StorageSystemSettingsProfileElements.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -40,7 +40,7 @@ NamesAndTypesList StorageSystemSettingsProfileElements::getNamesAndTypes() void StorageSystemSettingsProfileElements::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_SETTINGS_PROFILES); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); boost::range::push_back(ids, access_control.findAll()); boost::range::push_back(ids, access_control.findAll()); diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index 87847fb80bc..132f10ea194 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -1,16 +1,16 @@ #include +#include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include #include -#include -#include -#include -#include +#include namespace DB @@ -33,7 +33,7 @@ NamesAndTypesList StorageSystemSettingsProfiles::getNamesAndTypes() void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_SETTINGS_PROFILES); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); size_t column_index = 0; diff --git a/src/Storages/System/StorageSystemUserDirectories.cpp b/src/Storages/System/StorageSystemUserDirectories.cpp index d4a2d5169fa..3c550cd1cfe 100644 --- a/src/Storages/System/StorageSystemUserDirectories.cpp +++ b/src/Storages/System/StorageSystemUserDirectories.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB @@ -23,7 +23,7 @@ NamesAndTypesList StorageSystemUserDirectories::getNamesAndTypes() void StorageSystemUserDirectories::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); auto storages = access_control.getStorages(); size_t column_index = 0; diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index a48e12a1476..ca88fa688a0 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -1,4 +1,7 @@ #include +#include +#include +#include #include #include #include @@ -8,10 +11,7 @@ #include #include #include -#include -#include -#include -#include +#include #include #include #include @@ -25,8 +25,8 @@ namespace DataTypeEnum8::Values getAuthenticationTypeEnumValues() { DataTypeEnum8::Values enum_values; - for (auto type : collections::range(Authentication::MAX_TYPE)) - enum_values.emplace_back(Authentication::TypeInfo::get(type).name, static_cast(type)); + for (auto type : collections::range(AuthenticationType::MAX)) + enum_values.emplace_back(AuthenticationTypeInfo::get(type).name, static_cast(type)); return enum_values; } } @@ -59,7 +59,7 @@ NamesAndTypesList StorageSystemUsers::getNamesAndTypes() void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { context->checkAccess(AccessType::SHOW_USERS); - const auto & access_control = context->getAccessControlManager(); + const auto & access_control = context->getAccessControl(); std::vector ids = access_control.findAll(); size_t column_index = 0; @@ -91,7 +91,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte auto add_row = [&](const String & name, const UUID & id, const String & storage_name, - const Authentication & authentication, + const AuthenticationData & auth_data, const AllowedClientHosts & allowed_hosts, const RolesOrUsersSet & default_roles, const RolesOrUsersSet & grantees, @@ -100,19 +100,19 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte column_name.insertData(name.data(), name.length()); column_id.push_back(id.toUnderType()); column_storage.insertData(storage_name.data(), storage_name.length()); - column_auth_type.push_back(static_cast(authentication.getType())); + column_auth_type.push_back(static_cast(auth_data.getType())); if ( - authentication.getType() == Authentication::Type::LDAP || - authentication.getType() == Authentication::Type::KERBEROS + auth_data.getType() == AuthenticationType::LDAP || + auth_data.getType() == AuthenticationType::KERBEROS ) { Poco::JSON::Object auth_params_json; - if (authentication.getType() == Authentication::Type::LDAP) - auth_params_json.set("server", authentication.getLDAPServerName()); - else if (authentication.getType() == Authentication::Type::KERBEROS) - auth_params_json.set("realm", authentication.getKerberosRealm()); + if (auth_data.getType() == AuthenticationType::LDAP) + auth_params_json.set("server", auth_data.getLDAPServerName()); + else if (auth_data.getType() == AuthenticationType::KERBEROS) + auth_params_json.set("realm", auth_data.getKerberosRealm()); std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); @@ -197,7 +197,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte if (!storage) continue; - add_row(user->getName(), id, storage->getStorageName(), user->authentication, user->allowed_client_hosts, + add_row(user->getName(), id, storage->getStorageName(), user->auth_data, user->allowed_client_hosts, user->default_roles, user->grantees, user->default_database); } } diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index 218d86fe4a2..fa7f6e52220 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index dcb91c8cc2e..d8bdb3b45c4 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -1,7 +1,7 @@ #include #include "registerTableFunctions.h" -#include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index bbaa1b5f048..4df1e1d4982 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -1,7 +1,7 @@ #include #include "registerTableFunctions.h" -#include +#include #include #include #include @@ -24,12 +24,18 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast_function, ContextPtr co if (!func_args.arguments) throw Exception("Table function 'URL' must have arguments.", ErrorCodes::BAD_ARGUMENTS); - URLBasedDataSourceConfiguration configuration; if (auto with_named_collection = getURLBasedDataSourceConfiguration(func_args.arguments->children, context)) { auto [common_configuration, storage_specific_args] = with_named_collection.value(); configuration.set(common_configuration); + if (!configuration.http_method.empty() + && configuration.http_method != Poco::Net::HTTPRequest::HTTP_POST + && configuration.http_method != Poco::Net::HTTPRequest::HTTP_PUT) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Method can be POST or PUT (current: {}). For insert default is POST, for select GET", + configuration.http_method); + if (!storage_specific_args.empty()) { String illegal_args; @@ -39,7 +45,7 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast_function, ContextPtr co illegal_args += ", "; illegal_args += arg.first; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown arguments {} for table function URL", illegal_args); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown argument `{}` for table function URL", illegal_args); } filename = configuration.url; @@ -58,33 +64,25 @@ StoragePtr TableFunctionURL::getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, const std::string & table_name, const String & compression_method_) const { - /// If url contains {1..k} or failover options with separator `|`, use a separate storage - if ((source.find('{') == std::string::npos || source.find('}') == std::string::npos) && source.find('|') == std::string::npos) + ReadWriteBufferFromHTTP::HTTPHeaderEntries headers; + for (const auto & [header, value] : configuration.headers) { - Poco::URI uri(source); - return StorageURL::create( - uri, - StorageID(getDatabaseName(), table_name), - format_, - std::nullopt /*format settings*/, - columns, - ConstraintsDescription{}, - String{}, - global_context, - compression_method_); - } - else - { - return StorageExternalDistributed::create( - source, - StorageID(getDatabaseName(), table_name), - format_, - std::nullopt, - compression_method_, - columns, - ConstraintsDescription{}, - global_context); + auto value_literal = value.safeGet(); + headers.emplace_back(std::make_pair(header, value_literal)); } + + return StorageURL::create( + source, + StorageID(getDatabaseName(), table_name), + format_, + std::nullopt /*format settings*/, + columns, + ConstraintsDescription{}, + String{}, + global_context, + compression_method_, + headers, + configuration.http_method); } void registerTableFunctionURL(TableFunctionFactory & factory) diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index c35db9f9c8b..9425112acb2 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -27,6 +28,8 @@ private: const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, const std::string & table_name, const String & compression_method_) const override; const char * getStorageTypeName() const override { return "URL"; } + + URLBasedDataSourceConfiguration configuration; }; } diff --git a/tests/ci/approve_lambda/Dockerfile b/tests/ci/approve_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/approve_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/approve_lambda/app.py b/tests/ci/approve_lambda/app.py new file mode 100644 index 00000000000..ffc5afa2f86 --- /dev/null +++ b/tests/ci/approve_lambda/app.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 + +import json +import time +import fnmatch +from collections import namedtuple +import jwt + +import requests +import boto3 + +API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse' + +SUSPICIOUS_CHANGED_FILES_NUMBER = 200 + +SUSPICIOUS_PATTERNS = [ + "tests/ci/*", + "docs/tools/*", + ".github/*", + "utils/release/*", + "docker/*", + "release", +] + +MAX_RETRY = 5 + +WorkflowDescription = namedtuple('WorkflowDescription', + ['name', 'action', 'run_id', 'event', 'sender_login', + 'workflow_id', 'fork_owner_login', 'fork_branch', 'sender_orgs']) + +TRUSTED_WORKFLOW_IDS = { + 14586616, # Cancel workflows, always trusted +} + +TRUSTED_ORG_IDS = { + 7409213, # yandex + 28471076, # altinity + 54801242, # clickhouse +} + +# Individual trusted contirbutors who are not in any trusted organization. +# Can be changed in runtime: we will append users that we learned to be in +# a trusted org, to save GitHub API calls. +TRUSTED_CONTRIBUTORS = { + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasfl", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober" # Developer of YT +} + + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + + +def is_trusted_sender(pr_user_login, pr_user_orgs): + if pr_user_login in TRUSTED_CONTRIBUTORS: + print(f"User '{pr_user_login}' is trusted") + return True + + print(f"User '{pr_user_login}' is not trusted") + + for org_id in pr_user_orgs: + if org_id in TRUSTED_ORG_IDS: + print(f"Org '{org_id}' is trusted; will mark user {pr_user_login} as trusted") + return True + print(f"Org '{org_id}' is not trusted") + + return False + +def _exec_get_with_retry(url): + for i in range(MAX_RETRY): + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute GET request with retries") + +def _exec_post_with_retry(url, token, data=None): + headers = { + "Authorization": f"token {token}" + } + for i in range(MAX_RETRY): + try: + if data: + response = requests.post(url, headers=headers, json=data) + else: + response = requests.post(url, headers=headers) + if response.status_code == 403: + data = response.json() + if 'message' in data and data['message'] == 'This workflow run is not waiting for approval': + print("Workflow doesn't need approval") + return data + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute POST request with retry") + +def _get_pull_requests_from(owner, branch): + url = f"{API_URL}/pulls?head={owner}:{branch}" + return _exec_get_with_retry(url) + +def get_workflow_description_from_event(event): + action = event['action'] + sender_login = event['sender']['login'] + run_id = event['workflow_run']['id'] + event_type = event['workflow_run']['event'] + fork_owner = event['workflow_run']['head_repository']['owner']['login'] + fork_branch = event['workflow_run']['head_branch'] + orgs_data = _exec_get_with_retry(event['sender']['organizations_url']) + sender_orgs = [org['id'] for org in orgs_data] + name = event['workflow_run']['name'] + workflow_id = event['workflow_run']['workflow_id'] + return WorkflowDescription( + name=name, + action=action, + sender_login=sender_login, + run_id=run_id, + event=event_type, + fork_owner_login=fork_owner, + fork_branch=fork_branch, + sender_orgs=sender_orgs, + workflow_id=workflow_id, + ) + + +def get_changed_files_for_pull_request(pull_request): + number = pull_request['number'] + + changed_files = set([]) + for i in range(1, 31): + print("Requesting changed files page", i) + url = f"{API_URL}/pulls/{number}/files?page={i}&per_page=100" + data = _exec_get_with_retry(url) + print(f"Got {len(data)} changed files") + if len(data) == 0: + print("No more changed files") + break + + for change in data: + #print("Adding changed file", change['filename']) + changed_files.add(change['filename']) + + if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: + print(f"More than {len(changed_files)} changed files. Will stop fetching new files.") + break + + return changed_files + +def check_suspicious_changed_files(changed_files): + if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: + print(f"Too many files changed {len(changed_files)}, need manual approve") + return True + + for path in changed_files: + for pattern in SUSPICIOUS_PATTERNS: + if fnmatch.fnmatch(path, pattern): + print(f"File {path} match suspicious pattern {pattern}, will not approve automatically") + return True + + print("No changed files match suspicious patterns, run will be approved") + return False + +def approve_run(run_id, token): + url = f"{API_URL}/actions/runs/{run_id}/approve" + _exec_post_with_retry(url, token) + +def label_manual_approve(pull_request, token): + number = pull_request['number'] + url = f"{API_URL}/issues/{number}/labels" + data = {"labels" : "manual approve"} + + _exec_post_with_retry(url, token, data) + +def get_token_from_aws(): + private_key, app_id = get_key_and_app_from_aws() + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": app_id, + } + + encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + return get_access_token(encoded_jwt, installation_id) + +def main(event): + token = get_token_from_aws() + event_data = json.loads(event['body']) + workflow_description = get_workflow_description_from_event(event_data) + + print("Got workflow description", workflow_description) + if workflow_description.action != "requested": + print("Exiting, event action is", workflow_description.action) + return + + if workflow_description.workflow_id in TRUSTED_WORKFLOW_IDS: + print("Workflow in trusted list, approving run") + approve_run(workflow_description.run_id, token) + return + + if is_trusted_sender(workflow_description.sender_login, workflow_description.sender_orgs): + print("Sender is trusted, approving run") + approve_run(workflow_description.run_id, token) + return + + pull_requests = _get_pull_requests_from(workflow_description.fork_owner_login, workflow_description.fork_branch) + print("Got pull requests for workflow", len(pull_requests)) + if len(pull_requests) > 1: + raise Exception("Received more than one PR for workflow run") + + if len(pull_requests) < 1: + raise Exception("Cannot find any pull requests for workflow run") + + pull_request = pull_requests[0] + print("Pull request for workflow number", pull_request['number']) + + changed_files = get_changed_files_for_pull_request(pull_request) + print(f"Totally have {len(changed_files)} changed files in PR:", changed_files) + if check_suspicious_changed_files(changed_files): + print(f"Pull Request {pull_request['number']} has suspicious changes, label it for manuall approve") + label_manual_approve(pull_request, token) + else: + print(f"Pull Request {pull_request['number']} has no suspicious changes") + approve_run(workflow_description.run_id, token) + +def handler(event, _): + main(event) diff --git a/tests/ci/approve_lambda/requirements.txt b/tests/ci/approve_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/approve_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 96dfb01e234..43575cb75d7 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -50,7 +50,7 @@ def get_commit(gh, commit_sha): return commit def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): - s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') + s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_').replace('(', '_').replace(')', '_') additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" @@ -65,7 +65,7 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi raw_log_url = additional_urls[0] additional_urls.pop(0) - html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls, False) with open('report.html', 'w') as f: f.write(html_report) @@ -77,7 +77,7 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.path.join(os.getenv("REPO_COPY", os.path.abspath("../../"))) - temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') + temp_path = os.path.join(os.getenv("TEMP_PATH")) with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 292aa5c0472..62029b01fb1 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -40,6 +40,12 @@ class S3Helper(): elif s3_path.endswith("html"): metadata['ContentType'] = "text/html; charset=utf-8" logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path) + elif s3_path.endswith("css"): + metadata['ContentType'] = "text/css; charset=utf-8" + logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path) + elif s3_path.endswith("js"): + metadata['ContentType'] = "text/javascript; charset=utf-8" + logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path) else: logging.info("No content type provied for %s", file_path) else: diff --git a/tests/config/config.d/metric_log.xml b/tests/config/config.d/metric_log.xml deleted file mode 100644 index ea829d15975..00000000000 --- a/tests/config/config.d/metric_log.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - system - metric_log
- 7500 - 1000 -
-
diff --git a/tests/config/config.d/part_log.xml b/tests/config/config.d/part_log.xml deleted file mode 100644 index ce9847a49fb..00000000000 --- a/tests/config/config.d/part_log.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - system - part_log
- - 7500 -
-
diff --git a/tests/config/install.sh b/tests/config/install.sh index a451c9f3ed1..edf897ca430 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -17,9 +17,7 @@ mkdir -p $DEST_CLIENT_PATH ln -sf $SRC_PATH/config.d/zookeeper.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/part_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/metric_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/custom_settings_prefixes.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/macros.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/disks.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index b4f368abb8e..b56264fb570 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -3,264 +3,5 @@ "test_host_ip_change/test.py::test_ip_change_drop_dns_cache", "test_host_ip_change/test.py::test_ip_change_update_dns_cache", "test_host_ip_change/test.py::test_user_access_ip_change[node0]", - "test_host_ip_change/test.py::test_user_access_ip_change[node1]", - "test_atomic_drop_table/test.py::test_atomic_delete_with_stopped_zookeeper", - "test_attach_without_fetching/test.py::test_attach_without_fetching", - "test_broken_part_during_merge/test.py::test_merge_and_part_corruption", - "test_cleanup_dir_after_bad_zk_conn/test.py::test_attach_without_zk", - "test_cleanup_dir_after_bad_zk_conn/test.py::test_cleanup_dir_after_bad_zk_conn", - "test_cleanup_dir_after_bad_zk_conn/test.py::test_cleanup_dir_after_wrong_replica_name", - "test_cleanup_dir_after_bad_zk_conn/test.py::test_cleanup_dir_after_wrong_zk_path", - "test_consistent_parts_after_clone_replica/test.py::test_inconsistent_parts_if_drop_while_replica_not_active", - "test_cross_replication/test.py::test", - "test_ddl_worker_non_leader/test.py::test_non_leader_replica", - "test_delayed_replica_failover/test.py::test", - "test_dictionaries_update_field/test.py::test_update_field[complex_key_hashed_update_field_dictionary-HASHED]", - "test_dictionaries_update_field/test.py::test_update_field[flat_update_field_dictionary-FLAT]", - "test_dictionaries_update_field/test.py::test_update_field[simple_key_hashed_update_field_dictionary-HASHED]", - "test_dictionary_allow_read_expired_keys/test_default_reading.py::test_default_reading", - "test_dictionary_allow_read_expired_keys/test_default_string.py::test_return_real_values", - "test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py::test_simple_dict_get_or_default", - "test_dictionary_allow_read_expired_keys/test_dict_get.py::test_simple_dict_get", - "test_disabled_mysql_server/test.py::test_disabled_mysql_server", - "test_distributed_ddl_on_cross_replication/test.py::test_alter_ddl", - "test_distributed_ddl_on_cross_replication/test.py::test_atomic_database", - "test_distributed_ddl_parallel/test.py::test_all_in_parallel", - "test_distributed_ddl_parallel/test.py::test_slow_dict_load_7", - "test_distributed_ddl_parallel/test.py::test_smoke", - "test_distributed_ddl_parallel/test.py::test_smoke_parallel", - "test_distributed_ddl_parallel/test.py::test_smoke_parallel_dict_reload", - "test_distributed_ddl_parallel/test.py::test_two_in_parallel_two_queued", - "test_distributed_ddl_password/test.py::test_alter", - "test_distributed_ddl_password/test.py::test_truncate", - "test_distributed_ddl/test.py::test_allowed_databases[configs]", - "test_distributed_ddl/test.py::test_allowed_databases[configs_secure]", - "test_distributed_ddl/test.py::test_create_as_select[configs]", - "test_distributed_ddl/test.py::test_create_as_select[configs_secure]", - "test_distributed_ddl/test.py::test_create_reserved[configs]", - "test_distributed_ddl/test.py::test_create_reserved[configs_secure]", - "test_distributed_ddl/test.py::test_create_view[configs]", - "test_distributed_ddl/test.py::test_create_view[configs_secure]", - "test_distributed_ddl/test.py::test_default_database[configs]", - "test_distributed_ddl/test.py::test_default_database[configs_secure]", - "test_distributed_ddl/test.py::test_detach_query[configs]", - "test_distributed_ddl/test.py::test_detach_query[configs_secure]", - "test_distributed_ddl/test.py::test_implicit_macros[configs]", - "test_distributed_ddl/test.py::test_implicit_macros[configs_secure]", - "test_distributed_ddl/test.py::test_kill_query[configs]", - "test_distributed_ddl/test.py::test_kill_query[configs_secure]", - "test_distributed_ddl/test.py::test_macro[configs]", - "test_distributed_ddl/test.py::test_macro[configs_secure]", - "test_distributed_ddl/test.py::test_on_connection_loss[configs]", - "test_distributed_ddl/test.py::test_on_connection_loss[configs_secure]", - "test_distributed_ddl/test.py::test_on_server_fail[configs]", - "test_distributed_ddl/test.py::test_on_server_fail[configs_secure]", - "test_distributed_ddl/test.py::test_on_session_expired[configs]", - "test_distributed_ddl/test.py::test_on_session_expired[configs_secure]", - "test_distributed_ddl/test.py::test_optimize_query[configs]", - "test_distributed_ddl/test.py::test_optimize_query[configs_secure]", - "test_distributed_ddl/test.py::test_rename[configs]", - "test_distributed_ddl/test.py::test_rename[configs_secure]", - "test_distributed_ddl/test.py::test_replicated_without_arguments[configs]", - "test_distributed_ddl/test.py::test_replicated_without_arguments[configs_secure]", - "test_distributed_ddl/test.py::test_simple_alters[configs]", - "test_distributed_ddl/test.py::test_simple_alters[configs_secure]", - "test_distributed_ddl/test.py::test_socket_timeout[configs]", - "test_distributed_ddl/test.py::test_socket_timeout[configs_secure]", - "test_distributed_ddl/test_replicated_alter.py::test_replicated_alters[configs]", - "test_distributed_ddl/test_replicated_alter.py::test_replicated_alters[configs_secure]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs-default-node1-distributed]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs-default-node1-remote]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs-default-node2-distributed]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs-default-node2-remote]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs-ready_to_wait-node1-distributed]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs-ready_to_wait-node1-remote]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs-ready_to_wait-node2-distributed]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs-ready_to_wait-node2-remote]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs_secure-default-node1-distributed]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs_secure-default-node1-remote]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs_secure-default-node2-distributed]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs_secure-default-node2-remote]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs_secure-ready_to_wait-node1-distributed]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs_secure-ready_to_wait-node1-remote]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs_secure-ready_to_wait-node2-distributed]", - "test_distributed_respect_user_timeouts/test.py::test_reconnect[configs_secure-ready_to_wait-node2-remote]", - "test_drop_replica/test.py::test_drop_replica", - "test_hedged_requests_parallel/test.py::test_combination1", - "test_hedged_requests_parallel/test.py::test_combination2", - "test_hedged_requests_parallel/test.py::test_query_with_no_data_to_sample", - "test_hedged_requests_parallel/test.py::test_send_data", - "test_hedged_requests_parallel/test.py::test_send_table_status_sleep", - "test_hedged_requests/test.py::test_combination1", - "test_hedged_requests/test.py::test_combination2", - "test_hedged_requests/test.py::test_combination3", - "test_hedged_requests/test.py::test_combination4", - "test_hedged_requests/test.py::test_long_query", - "test_hedged_requests/test.py::test_receive_timeout1", - "test_hedged_requests/test.py::test_receive_timeout2", - "test_hedged_requests/test.py::test_send_data", - "test_hedged_requests/test.py::test_send_data2", - "test_hedged_requests/test.py::test_send_table_status_sleep", - "test_hedged_requests/test.py::test_send_table_status_sleep2", - "test_hedged_requests/test.py::test_stuck_replica", - "test_https_replication/test.py::test_both_http", - "test_https_replication/test.py::test_both_https", - "test_https_replication/test.py::test_mixed_protocol", - "test_https_replication/test.py::test_replication_after_partition", - "test_insert_into_distributed_sync_async/test.py::test_async_inserts_into_local_shard", - "test_insert_into_distributed_sync_async/test.py::test_insertion_sync", - "test_insert_into_distributed_sync_async/test.py::test_insertion_sync_fails_with_timeout", - "test_insert_into_distributed_sync_async/test.py::test_insertion_sync_with_disabled_timeout", - "test_insert_into_distributed_sync_async/test.py::test_insertion_without_sync_ignores_timeout", - "test_insert_into_distributed/test.py::test_inserts_batching", - "test_insert_into_distributed/test.py::test_inserts_local", - "test_insert_into_distributed/test.py::test_inserts_low_cardinality", - "test_insert_into_distributed/test.py::test_inserts_single_replica_internal_replication", - "test_insert_into_distributed/test.py::test_inserts_single_replica_local_internal_replication", - "test_insert_into_distributed/test.py::test_inserts_single_replica_no_internal_replication", - "test_insert_into_distributed/test.py::test_prefer_localhost_replica", - "test_insert_into_distributed/test.py::test_reconnect", - "test_insert_into_distributed/test.py::test_table_function", - "test_insert_into_distributed_through_materialized_view/test.py::test_inserts_local", - "test_insert_into_distributed_through_materialized_view/test.py::test_reconnect", - "test_keeper_multinode_blocade_leader/test.py::test_blocade_leader", - "test_keeper_multinode_blocade_leader/test.py::test_blocade_leader_twice", - "test_keeper_multinode_simple/test.py::test_follower_restart", - "test_keeper_multinode_simple/test.py::test_read_write_multinode", - "test_keeper_multinode_simple/test.py::test_session_expiration", - "test_keeper_multinode_simple/test.py::test_simple_replicated_table", - "test_keeper_multinode_simple/test.py::test_watch_on_follower", - "test_limited_replicated_fetches/test.py::test_limited_fetches", - "test_materialized_mysql_database/test.py::test_clickhouse_killed_while_insert_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_clickhouse_killed_while_insert_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_clickhouse_killed_while_insert_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_clickhouse_killed_while_insert_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_insert_with_modify_binlog_checksum_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_insert_with_modify_binlog_checksum_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_insert_with_modify_binlog_checksum_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_insert_with_modify_binlog_checksum_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_materialize_database_ddl_with_empty_transaction_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_materialize_database_ddl_with_empty_transaction_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_materialize_database_ddl_with_empty_transaction_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_materialize_database_ddl_with_empty_transaction_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_materialize_database_ddl_with_mysql_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_materialize_database_ddl_with_mysql_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_materialize_database_ddl_with_mysql_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_materialize_database_ddl_with_mysql_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_materialize_database_dml_with_mysql_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_materialize_database_dml_with_mysql_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_materialize_database_dml_with_mysql_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_materialize_database_dml_with_mysql_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_materialize_database_err_sync_user_privs_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_materialize_database_err_sync_user_privs_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_materialize_database_err_sync_user_privs_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_materialize_database_err_sync_user_privs_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_multi_table_update[clickhouse_node0]", - "test_materialized_mysql_database/test.py::test_multi_table_update[clickhouse_node1]", - "test_materialized_mysql_database/test.py::test_mysql_killed_while_insert_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_mysql_killed_while_insert_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_mysql_killed_while_insert_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_mysql_killed_while_insert_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_mysql_kill_sync_thread_restore_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_mysql_kill_sync_thread_restore_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_mysql_kill_sync_thread_restore_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_mysql_kill_sync_thread_restore_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_mysql_settings[clickhouse_node0]", - "test_materialized_mysql_database/test.py::test_mysql_settings[clickhouse_node1]", - "test_materialized_mysql_database/test.py::test_network_partition_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_network_partition_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_network_partition_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_network_partition_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_select_without_columns_5_7[atomic]", - "test_materialized_mysql_database/test.py::test_select_without_columns_5_7[ordinary]", - "test_materialized_mysql_database/test.py::test_select_without_columns_8_0[atomic]", - "test_materialized_mysql_database/test.py::test_select_without_columns_8_0[ordinary]", - "test_materialized_mysql_database/test.py::test_system_parts_table[clickhouse_node0]", - "test_materialized_mysql_database/test.py::test_system_parts_table[clickhouse_node1]", - "test_materialized_mysql_database/test.py::test_system_tables_table[clickhouse_node0]", - "test_materialized_mysql_database/test.py::test_system_tables_table[clickhouse_node1]", - "test_materialized_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node0]", - "test_materialized_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node1]", - "test_materialized_mysql_database/test.py::test_materialize_with_enum[clickhouse_node0]", - "test_materialized_mysql_database/test.py::test_materialize_with_enum[clickhouse_node1]", - "test_materialized_mysql_database/test.py::test_utf8mb4[clickhouse_node0]", - "test_materialized_mysql_database/test.py::test_utf8mb4[clickhouse_node1]", - "test_parts_delete_zookeeper/test.py::test_merge_doesnt_work_without_zookeeper", - "test_polymorphic_parts/test.py::test_compact_parts_only", - "test_polymorphic_parts/test.py::test_different_part_types_on_replicas[polymorphic_table_compact-Compact]", - "test_polymorphic_parts/test.py::test_different_part_types_on_replicas[polymorphic_table_wide-Wide]", - "test_polymorphic_parts/test.py::test_in_memory", - "test_polymorphic_parts/test.py::test_in_memory_alters", - "test_polymorphic_parts/test.py::test_in_memory_deduplication", - "test_polymorphic_parts/test.py::test_in_memory_wal_rotate", - "test_polymorphic_parts/test.py::test_polymorphic_parts_basics[first_node0-second_node0]", - "test_polymorphic_parts/test.py::test_polymorphic_parts_basics[first_node1-second_node1]", - "test_polymorphic_parts/test.py::test_polymorphic_parts_index", - "test_polymorphic_parts/test.py::test_polymorphic_parts_non_adaptive", - "test_quorum_inserts_parallel/test.py::test_parallel_quorum_actually_parallel", - "test_quorum_inserts_parallel/test.py::test_parallel_quorum_actually_quorum", - "test_random_inserts/test.py::test_insert_multithreaded", - "test_random_inserts/test.py::test_random_inserts", - "test_reload_clusters_config/test.py::test_add_cluster", - "test_reload_clusters_config/test.py::test_delete_cluster", - "test_reload_clusters_config/test.py::test_simple_reload", - "test_reload_clusters_config/test.py::test_update_one_cluster", - "test_replace_partition/test.py::test_drop_failover", - "test_replace_partition/test.py::test_normal_work", - "test_replace_partition/test.py::test_replace_after_replace_failover", - "test_replicated_database/test.py::test_alters_from_different_replicas", - "test_replicated_database/test.py::test_create_replicated_table", - "test_replicated_database/test.py::test_recover_staled_replica", - "test_replicated_database/test.py::test_simple_alter_table[MergeTree]", - "test_replicated_database/test.py::test_simple_alter_table[ReplicatedMergeTree]", - "test_replicated_database/test.py::test_startup_without_zk", - "test_replicated_fetches_timeouts/test.py::test_no_stall", - "test_storage_kafka/test.py::test_bad_reschedule", - "test_storage_kafka/test.py::test_commits_of_unprocessed_messages_on_drop", - "test_storage_kafka/test.py::test_exception_from_destructor", - "test_storage_kafka/test.py::test_kafka_commit_on_block_write", - "test_storage_kafka/test.py::test_kafka_consumer_hang", - "test_storage_kafka/test.py::test_kafka_consumer_hang2", - "test_storage_kafka/test.py::test_kafka_csv_with_delimiter", - "test_storage_kafka/test.py::test_kafka_csv_with_thread_per_consumer", - "test_storage_kafka/test.py::test_kafka_duplicates_when_commit_failed", - "test_storage_kafka/test.py::test_kafka_engine_put_errors_to_stream", - "test_storage_kafka/test.py::test_kafka_engine_put_errors_to_stream_with_random_malformed_json", - "test_storage_kafka/test.py::test_kafka_flush_by_block_size", - "test_storage_kafka/test.py::test_kafka_flush_by_time", - "test_storage_kafka/test.py::test_kafka_flush_on_big_message", - "test_storage_kafka/test.py::test_kafka_formats", - "test_storage_kafka/test.py::test_kafka_formats_with_broken_message", - "test_storage_kafka/test.py::test_kafka_insert", - "test_storage_kafka/test.py::test_kafka_issue11308", - "test_storage_kafka/test.py::test_kafka_issue14202", - "test_storage_kafka/test.py::test_kafka_issue4116", - "test_storage_kafka/test.py::test_kafka_json_as_string", - "test_storage_kafka/test.py::test_kafka_json_without_delimiter", - "test_storage_kafka/test.py::test_kafka_lot_of_partitions_partial_commit_of_bulk", - "test_storage_kafka/test.py::test_kafka_many_materialized_views", - "test_storage_kafka/test.py::test_kafka_materialized_view", - "test_storage_kafka/test.py::test_kafka_materialized_view_with_subquery", - "test_storage_kafka/test.py::test_kafka_no_holes_when_write_suffix_failed", - "test_storage_kafka/test.py::test_kafka_produce_consume", - "test_storage_kafka/test.py::test_kafka_produce_key_timestamp", - "test_storage_kafka/test.py::test_kafka_protobuf", - "test_storage_kafka/test.py::test_kafka_protobuf_no_delimiter", - "test_storage_kafka/test.py::test_kafka_rebalance", - "test_storage_kafka/test.py::test_kafka_select_empty", - "test_storage_kafka/test.py::test_kafka_settings_new_syntax", - "test_storage_kafka/test.py::test_kafka_settings_old_syntax", - "test_storage_kafka/test.py::test_kafka_string_field_on_first_position_in_protobuf", - "test_storage_kafka/test.py::test_kafka_tsv_with_delimiter", - "test_storage_kafka/test.py::test_kafka_unavailable", - "test_storage_kafka/test.py::test_kafka_virtual_columns", - "test_storage_kafka/test.py::test_kafka_virtual_columns2", - "test_storage_kafka/test.py::test_kafka_virtual_columns_with_materialized_view", - "test_storage_kafka/test.py::test_librdkafka_compression", - "test_storage_kafka/test.py::test_premature_flush_on_eof", - "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string", - "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_no_kdc", - "test_system_clusters_actual_information/test.py::test", - "test_system_metrics/test.py::test_readonly_metrics", - "test_system_replicated_fetches/test.py::test_system_replicated_fetches" + "test_host_ip_change/test.py::test_user_access_ip_change[node1]" ] diff --git a/tests/integration/test_attach_partition_with_large_destination/__init__.py b/tests/integration/test_attach_partition_with_large_destination/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_attach_partition_with_large_destination/configs/config.xml b/tests/integration/test_attach_partition_with_large_destination/configs/config.xml new file mode 100644 index 00000000000..0500e2ad554 --- /dev/null +++ b/tests/integration/test_attach_partition_with_large_destination/configs/config.xml @@ -0,0 +1,4 @@ + + 1 + 1 + diff --git a/tests/integration/test_attach_partition_with_large_destination/test.py b/tests/integration/test_attach_partition_with_large_destination/test.py new file mode 100644 index 00000000000..50f24f7a01e --- /dev/null +++ b/tests/integration/test_attach_partition_with_large_destination/test.py @@ -0,0 +1,50 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def create_force_drop_flag(node): + force_drop_flag_path = "/var/lib/clickhouse/flags/force_drop_table" + node.exec_in_container(["bash", "-c", "touch {} && chmod a=rw {}".format(force_drop_flag_path, force_drop_flag_path)], user="root") + +@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) +def test_attach_partition_with_large_destination(started_cluster, engine): + # Initialize + node.query("CREATE DATABASE db ENGINE={}".format(engine)) + node.query("CREATE TABLE db.destination (n UInt64) ENGINE=ReplicatedMergeTree('/test/destination', 'r1') ORDER BY n PARTITION BY n % 2") + node.query("CREATE TABLE db.source_1 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_1', 'r1') ORDER BY n PARTITION BY n % 2") + node.query("INSERT INTO db.source_1 VALUES (1), (2), (3), (4)") + node.query("CREATE TABLE db.source_2 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_2', 'r1') ORDER BY n PARTITION BY n % 2") + node.query("INSERT INTO db.source_2 VALUES (5), (6), (7), (8)") + + # Attach partition when destination partition is empty + node.query("ALTER TABLE db.destination ATTACH PARTITION 0 FROM db.source_1") + assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n" + + # REPLACE PARTITION should still respect max_partition_size_to_drop + assert node.query_and_get_error("ALTER TABLE db.destination REPLACE PARTITION 0 FROM db.source_2") + assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n" + + # Attach partition when destination partition is larger than max_partition_size_to_drop + node.query("ALTER TABLE db.destination ATTACH PARTITION 0 FROM db.source_2") + assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n6\n8\n" + + # Cleanup + create_force_drop_flag(node) + node.query("DROP TABLE db.source_1 SYNC") + create_force_drop_flag(node) + node.query("DROP TABLE db.source_2 SYNC") + create_force_drop_flag(node) + node.query("DROP TABLE db.destination SYNC") + node.query("DROP DATABASE db") \ No newline at end of file diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 472ecaf608b..3ba73b3405f 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -6,9 +6,11 @@ cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance') -def create_and_fill_table(): +def create_and_fill_table(engine="MergeTree"): + if engine == "MergeTree": + engine = "MergeTree ORDER BY y PARTITION BY x%10" instance.query("CREATE DATABASE test") - instance.query("CREATE TABLE test.table(x UInt32, y String) ENGINE=MergeTree ORDER BY y PARTITION BY x%10") + instance.query(f"CREATE TABLE test.table(x UInt32, y String) ENGINE={engine}") instance.query("INSERT INTO test.table SELECT number, toString(number) FROM numbers(100)") @@ -36,9 +38,11 @@ def new_backup_name(): return f"test-backup-{backup_id_counter}" -def test_restore_table(): + +@pytest.mark.parametrize("engine", ["MergeTree", "Log", "TinyLog", "StripeLog"]) +def test_restore_table(engine): backup_name = new_backup_name() - create_and_fill_table() + create_and_fill_table(engine=engine) assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" instance.query(f"BACKUP TABLE test.table TO '{backup_name}'") @@ -50,9 +54,10 @@ def test_restore_table(): assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" -def test_restore_table_into_existing_table(): +@pytest.mark.parametrize("engine", ["MergeTree", "Log", "TinyLog", "StripeLog"]) +def test_restore_table_into_existing_table(engine): backup_name = new_backup_name() - create_and_fill_table() + create_and_fill_table(engine=engine) assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" instance.query(f"BACKUP TABLE test.table TO '{backup_name}'") diff --git a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py index 8819be527fd..98658ec81d0 100644 --- a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py +++ b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py @@ -2,7 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster -cluster = ClickHouseCluster(__file__) +cluster = ClickHouseCluster(__file__, name="aggregate_fixed_key") node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='21.3', with_installed_binary=True) node2 = cluster.add_instance('node2', with_zookeeper=True, image='yandex/clickhouse-server') node3 = cluster.add_instance('node3', with_zookeeper=True, image='yandex/clickhouse-server') diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml index 72cb43caf09..8e254d769ea 100644 --- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml +++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml @@ -9,7 +9,7 @@ dict dep_y
- SELECT intDiv(count(), 5) from dict.dep_y + SELECT intDiv(count(), 4) from dict.dep_y diff --git a/tests/integration/test_dictionaries_dependency_xml/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py index 849fdf57980..6b8a5dff133 100644 --- a/tests/integration/test_dictionaries_dependency_xml/test.py +++ b/tests/integration/test_dictionaries_dependency_xml/test.py @@ -59,20 +59,19 @@ def test_get_data(started_cluster): query("INSERT INTO test.elements VALUES (3, 'fire', 30, 8)") # Wait for dictionaries to be reloaded. - assert_eq_with_retry(instance, "SELECT dictHas('dep_y', toUInt64(3))", "1", sleep_time=2, retry_count=10) - assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n" - assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n" - assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n" - - # dep_x and dep_z are updated only when there `intDiv(count(), 5)` is changed. - query("INSERT INTO test.elements VALUES (4, 'ether', 404, 0.001)") - assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(4))", "1", sleep_time=2, retry_count=10) + assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(3))", "1", sleep_time=2, retry_count=10) assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "fire\n" assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n" assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "fire\n" - assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "ether\n" + + # dep_z (and hence dep_x) are updated only when there `intDiv(count(), 4)` is changed, now `count()==4`, + # so dep_x and dep_z are not going to be updated after the following INSERT. + query("INSERT INTO test.elements VALUES (4, 'ether', 404, 0.001)") + assert_eq_with_retry(instance, "SELECT dictHas('dep_y', toUInt64(4))", "1", sleep_time=2, retry_count=10) + assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "XX\n" assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n" - assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ether\n" + assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ZZ\n" + def dependent_tables_assert(): res = instance.query("select database || '.' || name from system.tables") diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 4d2f70ad08c..001a46e1237 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -554,7 +554,7 @@ def test_concurrent_queries(started_cluster): busy_pool = Pool(5) p = busy_pool.map_async(node_insert, range(5)) p.wait() - assert_eq_with_retry(node1, "SELECT count() FROM test_pg_table", str(5*5*1000)) + assert_eq_with_retry(node1, "SELECT count() FROM test_pg_table", str(5*5*1000), retry_count=100) def node_insert_select(_): for i in range(5): @@ -564,7 +564,7 @@ def test_concurrent_queries(started_cluster): busy_pool = Pool(5) p = busy_pool.map_async(node_insert_select, range(5)) p.wait() - assert_eq_with_retry(node1, "SELECT count() FROM test_pg_table", str(5*5*1000*2)) + assert_eq_with_retry(node1, "SELECT count() FROM test_pg_table", str(5*5*1000*2), retry_count=100) node1.query('DROP TABLE test_pg_table;') cursor.execute('DROP TABLE clickhouse.test_pg_table;') @@ -627,4 +627,3 @@ def test_odbc_long_text(started_cluster): cursor.execute("""insert into clickhouse.test_long_text (flen, field1) values (400000, '{}')""".format(long_text)); result = node1.query("select field1 from test_long_text where flen=400000;") assert(result.strip() == long_text) - diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index f3c83166b46..995f704262e 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -257,6 +257,34 @@ def test_truncate_table(started_cluster): node1.query("drop table test_truncate") +def test_partition_by(started_cluster): + hdfs_api = started_cluster.hdfs_api + + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + file_name = "test_{_partition_id}" + partition_by = "column3" + values = "(1, 2, 3), (3, 2, 1), (1, 3, 2)" + table_function = f"hdfs('hdfs://hdfs1:9000/{file_name}', 'TSV', '{table_format}')" + + node1.query(f"insert into table function {table_function} PARTITION BY {partition_by} values {values}") + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test_1', 'TSV', '{table_format}')") + assert(result.strip() == "3\t2\t1") + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test_2', 'TSV', '{table_format}')") + assert(result.strip() == "1\t3\t2") + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test_3', 'TSV', '{table_format}')") + assert(result.strip() == "1\t2\t3") + + file_name = "test2_{_partition_id}" + node1.query(f"create table p(column1 UInt32, column2 UInt32, column3 UInt32) engine = HDFS('hdfs://hdfs1:9000/{file_name}', 'TSV') partition by column3") + node1.query(f"insert into p values {values}") + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test2_1', 'TSV', '{table_format}')") + assert(result.strip() == "3\t2\t1") + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test2_2', 'TSV', '{table_format}')") + assert(result.strip() == "1\t3\t2") + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test2_3', 'TSV', '{table_format}')") + assert(result.strip() == "1\t2\t3") + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 6106966e5b7..bf9ab4f9e27 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -8,6 +8,7 @@ import logging import io import string import ast +import math import avro.schema import avro.io @@ -119,17 +120,20 @@ def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): def kafka_producer_send_heartbeat_msg(max_retries=50): kafka_produce(kafka_cluster, 'test_heartbeat_topic', ['test'], retries=max_retries) -def kafka_consume(kafka_cluster, topic): +def kafka_consume(kafka_cluster, topic, needDecode = True, timestamp = 0): consumer = KafkaConsumer(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), auto_offset_reset="earliest") consumer.subscribe(topics=(topic)) for toppar, messages in list(consumer.poll(5000).items()): if toppar.topic == topic: for message in messages: - yield message.value.decode() + assert timestamp == 0 or message.timestamp / 1000 == timestamp + if needDecode: + yield message.value.decode() + else: + yield message.value consumer.unsubscribe() consumer.close() - def kafka_produce_protobuf_messages(kafka_cluster, topic, start_index, num_messages): data = b'' for i in range(start_index, start_index + num_messages): @@ -681,6 +685,16 @@ def kafka_check_result(result, check=False, ref_file='test_kafka_json.reference' return TSV(result) == TSV(reference) +def decode_avro(message): + b = io.BytesIO(message) + ret = avro.datafile.DataFileReader(b, avro.io.DatumReader()) + + output = io.StringIO() + for record in ret: + print(record, file=output) + return output.getvalue() + + # https://stackoverflow.com/a/57692111/1555175 def describe_consumer_group(kafka_cluster, name): client = BrokerConnection('localhost', kafka_cluster.kafka_port, socket.AF_INET) @@ -1829,6 +1843,86 @@ def test_kafka_produce_key_timestamp(kafka_cluster): kafka_delete_topic(admin_client, topic_name) +def test_kafka_insert_avro(kafka_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.kafka; + CREATE TABLE test.kafka (key UInt64, value UInt64, _timestamp DateTime('UTC')) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'avro1', + kafka_group_name = 'avro1', + kafka_format = 'Avro'; + ''') + + + instance.query("INSERT INTO test.kafka select number*10 as key, number*100 as value, 1636505534 as _timestamp from numbers(4) SETTINGS output_format_avro_rows_in_file = 2, output_format_avro_codec = 'deflate'") + + messages = [] + while True: + messages.extend(kafka_consume(kafka_cluster, 'avro1', needDecode = False, timestamp = 1636505534)) + if len(messages) == 2: + break + + result = '' + for a_message in messages: + result += decode_avro(a_message) + '\n' + + expected_result = """{'key': 0, 'value': 0, '_timestamp': 1636505534} +{'key': 10, 'value': 100, '_timestamp': 1636505534} + +{'key': 20, 'value': 200, '_timestamp': 1636505534} +{'key': 30, 'value': 300, '_timestamp': 1636505534} + +""" + assert (result == expected_result) + + +def test_kafka_produce_consume_avro(kafka_cluster): + + admin_client = KafkaAdminClient(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port)) + + topic_name = "insert_avro" + kafka_create_topic(admin_client, topic_name) + + num_rows = 75 + + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka_writer; + + CREATE TABLE test.kafka_writer (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'avro', + kafka_group_name = 'avro', + kafka_format = 'Avro'; + + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'avro', + kafka_group_name = 'avro', + kafka_format = 'Avro'; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.kafka; + ''') + + instance.query("INSERT INTO test.kafka_writer select number*10 as key, number*100 as value from numbers({num_rows}) SETTINGS output_format_avro_rows_in_file = 7".format(num_rows=num_rows)) + + instance.wait_for_log_line("Committed offset {offset}".format(offset=math.ceil(num_rows/7))) + + expected_num_rows = instance.query("SELECT COUNT(1) FROM test.view", ignore_error=True) + assert (int(expected_num_rows) == num_rows) + + expected_max_key = instance.query("SELECT max(key) FROM test.view", ignore_error=True) + assert (int(expected_max_key) == (num_rows - 1) * 10) + + kafka_delete_topic(admin_client, topic_name) + + def test_kafka_flush_by_time(kafka_cluster): admin_client = KafkaAdminClient(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port)) topic_name = "flush_by_time" @@ -2850,7 +2944,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): # broken message b'\x05\x02\x69\x64\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x04\x76\x61\x6c\x31\x04\x76\x61\x6c\x32\x04\x76\x61\x6c\x33\x05\x49\x6e\x74\x36\x34\x06\x53\x74\x72\x69\x6e\x67\x06\x53\x74\x72\x69\x6e\x67\x07\x46\x6c\x6f\x61\x74\x33\x32\x05\x55\x49\x6e\x74\x38\x00\x00\x00\x00\x00\x00\x00\x00\x03\x42\x41\x44\x02\x41\x4d\x00\x00\x00\x3f\x01', ], - 'expected':'{"raw_message":"0502696407626C6F636B4E6F0476616C310476616C320476616C3305496E74363406537472696E6706537472696E6707466C6F617433320555496E743800000000000000000342414402414D0000003F01","error":"Cannot read all data. Bytes read: 9. Bytes expected: 65.: (at row 1)\\n"}', + 'expected':'{"raw_message":"0502696407626C6F636B4E6F0476616C310476616C320476616C3305496E74363406537472696E6706537472696E6707466C6F617433320555496E743800000000000000000342414402414D0000003F01","error":"Type of \'blockNo\' must be UInt16, not String"}', 'printable':False, }, 'ORC': { diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 36d63588386..90793bea428 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -6,6 +6,7 @@ import threading import logging import time from random import randrange +import math import pika import pytest @@ -250,7 +251,7 @@ def test_rabbitmq_macros(rabbitmq_cluster): for i in range(50): message += json.dumps({'key': i, 'value': i}) + '\n' channel.basic_publish(exchange='macro', routing_key='', body=message) - + connection.close() time.sleep(1) @@ -2027,6 +2028,47 @@ def test_rabbitmq_queue_consume(rabbitmq_cluster): instance.query('DROP TABLE test.rabbitmq_queue') +def test_rabbitmq_produce_consume_avro(rabbitmq_cluster): + num_rows = 75 + + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.rabbit; + DROP TABLE IF EXISTS test.rabbit_writer; + + CREATE TABLE test.rabbit_writer (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_format = 'Avro', + rabbitmq_exchange_name = 'avro', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'avro'; + + CREATE TABLE test.rabbit (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_format = 'Avro', + rabbitmq_exchange_name = 'avro', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'avro'; + + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT key, value FROM test.rabbit; + ''') + + instance.query("INSERT INTO test.rabbit_writer select number*10 as key, number*100 as value from numbers({num_rows}) SETTINGS output_format_avro_rows_in_file = 7".format(num_rows=num_rows)) + + + # Ideally we should wait for an event + time.sleep(3) + + expected_num_rows = instance.query("SELECT COUNT(1) FROM test.view", ignore_error=True) + assert (int(expected_num_rows) == num_rows) + + expected_max_key = instance.query("SELECT max(key) FROM test.view", ignore_error=True) + assert (int(expected_max_key) == (num_rows - 1) * 10) + + def test_rabbitmq_bad_args(rabbitmq_cluster): credentials = pika.PlainCredentials('root', 'clickhouse') parameters = pika.ConnectionParameters(rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, '/', credentials) @@ -2042,6 +2084,148 @@ def test_rabbitmq_bad_args(rabbitmq_cluster): ''') +def test_rabbitmq_issue_30691(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq_drop (json String) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = '30691', + rabbitmq_row_delimiter = '\\n', -- Works only if adding this setting + rabbitmq_format = 'LineAsString', + rabbitmq_queue_base = '30691'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters(rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + channel.basic_publish(exchange='30691', routing_key='', body=json.dumps({"event_type": "purge", "as_src": 1234, "as_dst": 0, "as_path": "", + "local_pref": 100, "med": 0, "peer_as_dst": 0, + "ip_src": "", "ip_dst": "", + "port_src": 443, "port_dst": 41930, "ip_proto": "tcp", + "tos": 0, "stamp_inserted": "2021-10-26 15:20:00", + "stamp_updated": "2021-10-26 15:23:14", "packets": 2, "bytes": 1216, "writer_id": "default_amqp/449206"})) + result = '' + while True: + result = instance.query('SELECT * FROM test.rabbitmq_drop', ignore_error=True) + print(result) + if result != "": + break + assert(result.strip() =="""{"event_type": "purge", "as_src": 1234, "as_dst": 0, "as_path": "", "local_pref": 100, "med": 0, "peer_as_dst": 0, "ip_src": "", "ip_dst": "", "port_src": 443, "port_dst": 41930, "ip_proto": "tcp", "tos": 0, "stamp_inserted": "2021-10-26 15:20:00", "stamp_updated": "2021-10-26 15:23:14", "packets": 2, "bytes": 1216, "writer_id": "default_amqp/449206"}""") + + +def test_rabbitmq_drop_mv(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'mv', + rabbitmq_format = 'JSONEachRow', + rabbitmq_queue_base = 'drop_mv'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters(rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(20): + channel.basic_publish(exchange='mv', routing_key='', body=json.dumps({'key': i, 'value': i})) + + instance.query('DROP VIEW test.consumer') + for i in range(20, 40): + channel.basic_publish(exchange='mv', routing_key='', body=json.dumps({'key': i, 'value': i})) + + instance.query(''' + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + for i in range(40, 50): + channel.basic_publish(exchange='mv', routing_key='', body=json.dumps({'key': i, 'value': i})) + + while True: + result = instance.query('SELECT * FROM test.view ORDER BY key') + if (rabbitmq_check_result(result)): + break + + rabbitmq_check_result(result, True) + + instance.query('DROP VIEW test.consumer') + for i in range(50, 60): + channel.basic_publish(exchange='mv', routing_key='', body=json.dumps({'key': i, 'value': i})) + connection.close() + + count = 0 + while True: + count = int(instance.query('SELECT count() FROM test.rabbitmq')) + if (count): + break + + assert(count > 0) + + +def test_rabbitmq_random_detach(rabbitmq_cluster): + NUM_CONSUMERS = 2 + NUM_QUEUES = 2 + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'random', + rabbitmq_queue_base = 'random', + rabbitmq_num_queues = 2, + rabbitmq_num_consumers = 2, + rabbitmq_format = 'JSONEachRow'; + CREATE TABLE test.view (key UInt64, value UInt64, channel_id String) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT *, _channel_id AS channel_id FROM test.rabbitmq; + ''') + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters(rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, '/', credentials) + + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + mes_id = str(i) + channel.basic_publish(exchange='test_sharding', routing_key='', properties=pika.BasicProperties(message_id=mes_id), body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + time.sleep(5) + kill_rabbitmq(rabbitmq_cluster.rabbitmq_docker_id) + instance.query("detach table test.rabbitmq") + revive_rabbitmq(rabbitmq_cluster.rabbitmq_docker_id) + + for thread in threads: + thread.join() + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index e25535b860f..bd918144935 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -163,6 +163,13 @@ def test_partition_by(started_cluster): assert "3,2,1\n" == get_s3_file_content(started_cluster, bucket, "test_1.csv") assert "78,43,45\n" == get_s3_file_content(started_cluster, bucket, "test_45.csv") + filename = "test2_{_partition_id}.csv" + instance.query(f"create table p ({table_format}) engine=S3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{filename}', 'CSV') partition by column3") + instance.query(f"insert into p values {values}") + assert "1,2,3\n" == get_s3_file_content(started_cluster, bucket, "test2_3.csv") + assert "3,2,1\n" == get_s3_file_content(started_cluster, bucket, "test2_1.csv") + assert "78,43,45\n" == get_s3_file_content(started_cluster, bucket, "test2_45.csv") + def test_partition_by_string_column(started_cluster): bucket = started_cluster.minio_bucket diff --git a/tests/integration/test_storage_url/__init__.py b/tests/integration/test_storage_url/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_url/configs/conf.xml b/tests/integration/test_storage_url/configs/conf.xml new file mode 100644 index 00000000000..e3e8627d95e --- /dev/null +++ b/tests/integration/test_storage_url/configs/conf.xml @@ -0,0 +1,11 @@ + + + + + http://nginx:80/test_{_partition_id} + PUT + TSV + column1 UInt32, column2 UInt32, column3 UInt32 + + + diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py new file mode 100644 index 00000000000..1ced71bc849 --- /dev/null +++ b/tests/integration/test_storage_url/test.py @@ -0,0 +1,29 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +uuids = [] + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance("node1", main_configs=["configs/conf.xml"], with_nginx=True) + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_partition_by(cluster): + node1 = cluster.instances["node1"] + + node1.query(f"insert into table function url(url1) partition by column3 values (1, 2, 3), (3, 2, 1), (1, 3, 2)") + result = node1.query(f"select * from url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')") + assert(result.strip() == "3\t2\t1") + result = node1.query(f"select * from url('http://nginx:80/test_2', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')") + assert(result.strip() == "1\t3\t2") + result = node1.query(f"select * from url('http://nginx:80/test_3', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')") + assert(result.strip() == "1\t2\t3") diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml index bc9f9be2a18..f55af077023 100644 --- a/tests/performance/sum_map.xml +++ b/tests/performance/sum_map.xml @@ -31,4 +31,6 @@ SELECT {func}(key, val) FROM sum_map_{scale} FORMAT Null SELECT {func}((key, val)) FROM sum_map_{scale} FORMAT Null + + DROP TABLE sum_map_{scale} diff --git a/tests/performance/tsv_csv_nullable_parsing.xml b/tests/performance/tsv_csv_nullable_parsing.xml new file mode 100644 index 00000000000..2d5c5cec275 --- /dev/null +++ b/tests/performance/tsv_csv_nullable_parsing.xml @@ -0,0 +1,15 @@ + + +CREATE TABLE IF NOT EXISTS table_tsv (s Nullable(String)) ENGINE = File('TSV') +CREATE TABLE IF NOT EXISTS table_csv (s Nullable(String)) ENGINE = File('CSV') + +INSERT INTO table_tsv SELECT number % 2 ? 'Some text' : NULL FROM numbers(1000000) FORMAT TSV +INSERT INTO table_csv SELECT number % 2 ? 'Some text' : NULL FROM numbers(1000000) FORMAT CSV + +SELECT * FROM table_tsv FORMAT Null +SELECT * FROM table_csv FORMAT Null + +DROP TABLE IF EXISTS table_tsv +DROP TABLE IF EXISTS table_csv + + diff --git a/tests/queries/0_stateless/00255_array_concat_string.reference b/tests/queries/0_stateless/00255_array_concat_string.reference index e9fafe93ed1..4ffac8e5de0 100644 --- a/tests/queries/0_stateless/00255_array_concat_string.reference +++ b/tests/queries/0_stateless/00255_array_concat_string.reference @@ -65,7 +65,17 @@ yandex google test 123 hello world goodbye xyz yandex google test 123 hello wo 0 hello;world;xyz;def - +1;23;456 +1;23;456 +127.0.0.1; 1.0.0.1 +127.0.0.1; 1.0.0.1 +2021-10-01; 2021-10-02 +2021-10-01; 2021-10-02 hello;world;xyz;def - +1;23;456 +1;23;456 +127.0.0.1; 1.0.0.1 +127.0.0.1; 1.0.0.1 +2021-10-01; 2021-10-02 +2021-10-01; 2021-10-02 diff --git a/tests/queries/0_stateless/00255_array_concat_string.sql b/tests/queries/0_stateless/00255_array_concat_string.sql index 3bdae0821cd..f4f95956a16 100644 --- a/tests/queries/0_stateless/00255_array_concat_string.sql +++ b/tests/queries/0_stateless/00255_array_concat_string.sql @@ -9,8 +9,18 @@ SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), ',') FROM sy SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['yandex', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number % 4))) FROM system.numbers LIMIT 10; SELECT arrayStringConcat([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null], ';'); -SELECT arrayStringConcat([Null, Null], ';'); SELECT arrayStringConcat([Null::Nullable(String), Null::Nullable(String)], ';'); +SELECT arrayStringConcat(arr, ';') FROM (SELECT [1, 23, 456] AS arr); +SELECT arrayStringConcat(arr, ';') FROM (SELECT [Null, 1, Null, 23, Null, 456, Null] AS arr); +SELECT arrayStringConcat(arr, '; ') FROM (SELECT [toIPv4('127.0.0.1'), toIPv4('1.0.0.1')] AS arr); +SELECT arrayStringConcat(arr, '; ') FROM (SELECT [toIPv4('127.0.0.1'), Null, toIPv4('1.0.0.1')] AS arr); +SELECT arrayStringConcat(arr, '; ') FROM (SELECT [toDate('2021-10-01'), toDate('2021-10-02')] AS arr); +SELECT arrayStringConcat(arr, '; ') FROM (SELECT [toDate('2021-10-01'), Null, toDate('2021-10-02')] AS arr); SELECT arrayStringConcat(materialize([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null]), ';'); -SELECT arrayStringConcat(materialize([Null, Null]), ';'); SELECT arrayStringConcat(materialize([Null::Nullable(String), Null::Nullable(String)]), ';'); +SELECT arrayStringConcat(arr, ';') FROM (SELECT materialize([1, 23, 456]) AS arr); +SELECT arrayStringConcat(arr, ';') FROM (SELECT materialize([Null, 1, Null, 23, Null, 456, Null]) AS arr); +SELECT arrayStringConcat(arr, '; ') FROM (SELECT materialize([toIPv4('127.0.0.1'), toIPv4('1.0.0.1')]) AS arr); +SELECT arrayStringConcat(arr, '; ') FROM (SELECT materialize([toIPv4('127.0.0.1'), Null, toIPv4('1.0.0.1')]) AS arr); +SELECT arrayStringConcat(arr, '; ') FROM (SELECT materialize([toDate('2021-10-01'), toDate('2021-10-02')]) AS arr); +SELECT arrayStringConcat(arr, '; ') FROM (SELECT materialize([toDate('2021-10-01'), Null, toDate('2021-10-02')]) AS arr); diff --git a/tests/queries/0_stateless/00300_csv.reference b/tests/queries/0_stateless/00300_csv.reference index 9d2fe7233d8..42cd22078c4 100644 --- a/tests/queries/0_stateless/00300_csv.reference +++ b/tests/queries/0_stateless/00300_csv.reference @@ -1,6 +1,10 @@ +"Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline +here" "x","y","z","a","b" "Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline here" +"x","y","z","a","b" +"String","UInt8","Array(UInt8)","Tuple(UInt16, Array(String))","String" "Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline here" 0,"0","[]","2000-01-01","2000-01-01 00:00:00" diff --git a/tests/queries/0_stateless/00300_csv.sql b/tests/queries/0_stateless/00300_csv.sql index 0c761ad0af1..76b1b29df06 100644 --- a/tests/queries/0_stateless/00300_csv.sql +++ b/tests/queries/0_stateless/00300_csv.sql @@ -1,3 +1,4 @@ -SELECT 'Hello, "World"' AS x, 123 AS y, [1, 2, 3] AS z, (456, ['abc', 'def']) AS a, 'Newline\nhere' AS b FORMAT CSVWithNames; SELECT 'Hello, "World"' AS x, 123 AS y, [1, 2, 3] AS z, (456, ['abc', 'def']) AS a, 'Newline\nhere' AS b FORMAT CSV; +SELECT 'Hello, "World"' AS x, 123 AS y, [1, 2, 3] AS z, (456, ['abc', 'def']) AS a, 'Newline\nhere' AS b FORMAT CSVWithNames; +SELECT 'Hello, "World"' AS x, 123 AS y, [1, 2, 3] AS z, (456, ['abc', 'def']) AS a, 'Newline\nhere' AS b FORMAT CSVWithNamesAndTypes; SELECT number, toString(number), range(number), toDate('2000-01-01') + number, toDateTime('2000-01-01 00:00:00') + number FROM system.numbers LIMIT 10 FORMAT CSV; diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 0aee9abe25c..e10e98a123d 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -13,7 +13,7 @@ Hello "world", 789 ,2016-01-03 "Hello world", 100, 2016-01-04, default,, - default-eof,,' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --query="INSERT INTO csv FORMAT CSV"; + default-eof,,' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; @@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Europe/Moscow echo 'NULL, NULL "2016-01-01 01:02:03",NUL -"2016-01-02 01:02:03",Nhello' | $CLICKHOUSE_CLIENT --input_format_csv_unquoted_null_literal_as_null=1 --query="INSERT INTO csv FORMAT CSV"; +"2016-01-02 01:02:03",Nhello' | $CLICKHOUSE_CLIENT --format_csv_null_representation='NULL' --input_format_csv_empty_as_default=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; diff --git a/tests/queries/0_stateless/00938_template_input_format.reference b/tests/queries/0_stateless/00938_template_input_format.reference index ce89532886d..e1f77d9a581 100644 --- a/tests/queries/0_stateless/00938_template_input_format.reference +++ b/tests/queries/0_stateless/00938_template_input_format.reference @@ -23,3 +23,11 @@ cv bn m","","as""df'gh","",456,"2016-01-02" "as""df'gh","","zx cv bn m","",789,"2016-01-04" "qwe,rty","","","",9876543210,"2016-01-03" +==== check raw ==== +"qwe,rty","as""df'gh","","zx +cv bn m",123,"2016-01-01" +"as""df\'gh","","zx +cv bn m","qwe,rty",456,"2016-01-02" +"zx\cv\bn m","qwe,rty","as""df'gh","",789,"2016-01-04" +"","zx +cv bn m","qwe,rty","as""df'gh",9876543210,"2016-01-03" diff --git a/tests/queries/0_stateless/00938_template_input_format.sh b/tests/queries/0_stateless/00938_template_input_format.sh index 75616b35af0..9218f4bebca 100755 --- a/tests/queries/0_stateless/00938_template_input_format.sh +++ b/tests/queries/0_stateless/00938_template_input_format.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-parallel + # shellcheck disable=SC2016,SC2028 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) @@ -50,6 +52,30 @@ format_template_rows_between_delimiter = ','"; $CLICKHOUSE_CLIENT --query="SELECT * FROM template2 ORDER BY n FORMAT CSV"; +echo "==== check raw ====" + +echo -ne '{prefix} \n${data}\n $$ suffix $$\n' > "$CURDIR"/00938_template_input_format_resultset.tmp +echo -ne 'n:\t${n:Escaped}, s1:\t${0:Raw}\t, s2:\t${1:Quoted}, s3:\t${s3:JSON}, s4:\t${3:CSV}, d:\t${d:Escaped}\t' > "$CURDIR"/00938_template_input_format_row.tmp + + +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE template1"; + +echo "{prefix}"' '" +n: 123, s1: qwe,rty , s2: 'as\"df\\'gh', s3: \"\", s4: \"zx +cv bn m\", d: 2016-01-01 ; +n: 456, s1: as\"df\\'gh , s2: '', s3: \"zx\\ncv\\tbn m\", s4: \"qwe,rty\", d: 2016-01-02 ; +n: 9876543210, s1: , s2: 'zx\\ncv\\tbn m', s3: \"qwe,rty\", s4: \"as\"\"df'gh\", d: 2016-01-03 ; +n: 789, s1: zx\cv\bn m , s2: 'qwe,rty', s3: \"as\\\"df'gh\", s4: \"\", d: 2016-01-04"$'\t'" + $ suffix $" | $CLICKHOUSE_CLIENT --query="INSERT INTO template1 FORMAT Template SETTINGS \ +format_template_resultset = '$CURDIR/00938_template_input_format_resultset.tmp', \ +format_template_row = '$CURDIR/00938_template_input_format_row.tmp', \ +format_template_rows_between_delimiter = ';\n'"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT CSV"; + + + $CLICKHOUSE_CLIENT --query="DROP TABLE template1"; $CLICKHOUSE_CLIENT --query="DROP TABLE template2"; rm "$CURDIR"/00938_template_input_format_resultset.tmp "$CURDIR"/00938_template_input_format_row.tmp + diff --git a/tests/queries/0_stateless/01034_JSONCompactEachRow.reference b/tests/queries/0_stateless/01034_JSONCompactEachRow.reference index 6ec53e11fc9..bfc99d688d5 100644 --- a/tests/queries/0_stateless/01034_JSONCompactEachRow.reference +++ b/tests/queries/0_stateless/01034_JSONCompactEachRow.reference @@ -12,6 +12,11 @@ [1, "a"] [2, "b"] [3, "c"] +---------- +["value", "name"] +[1, "a"] +[2, "b"] +[3, "c"] 4 ["name", "c"] ["String", "UInt64"] @@ -31,17 +36,33 @@ 8 ["first", 1, 2, 0] ["second", 2, 0, 6] +["first", 1, 2, 0] +["second", 2, 0, 6] 9 ["first", 1, 2, 8] ["second", 2, 32, 6] +["first", 1, 2, 8] +["second", 2, 32, 6] 10 ["first", 1, 16, 8] ["second", 2, 32, 8] +["first", 1, 16, 8] +["second", 2, 32, 8] 11 ["v1", "v2", "v3", "v4"] ["String", "UInt8", "UInt16", "UInt8"] ["", 2, 3, 1] +["", 2, 3, 1] +---------- +["v1", "v2", "v3", "v4"] +["", 2, 3, 1] +["", 2, 3, 1] 12 ["v1", "n.id", "n.name"] ["UInt8", "Array(UInt8)", "Array(String)"] [16, [15,16,0], ["first","second","third"]] +[16, [15,16,0], ["first","second","third"]] +---------- +["v1", "n.id", "n.name"] +[16, [15,16,0], ["first","second","third"]] +[16, [15,16,0], ["first","second","third"]] diff --git a/tests/queries/0_stateless/01034_JSONCompactEachRow.sql b/tests/queries/0_stateless/01034_JSONCompactEachRow.sql index f5442c90a2a..f71597a60e5 100644 --- a/tests/queries/0_stateless/01034_JSONCompactEachRow.sql +++ b/tests/queries/0_stateless/01034_JSONCompactEachRow.sql @@ -10,8 +10,10 @@ SELECT 2; /* Check Totals */ SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactEachRow; SELECT 3; -/* Check JSONCompactEachRowWithNamesAndTypes Output */ +/* Check JSONCompactEachRowWithNames and JSONCompactEachRowWithNamesAndTypes Output */ SELECT * FROM test_table FORMAT JSONCompactEachRowWithNamesAndTypes; +SELECT '----------'; +SELECT * FROM test_table FORMAT JSONCompactEachRowWithNames; SELECT 4; /* Check Totals */ SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactEachRowWithNamesAndTypes; @@ -35,30 +37,39 @@ INSERT INTO test_table_2 FORMAT JSONCompactEachRow [16, [15, 16, null], ["first" SELECT * FROM test_table_2 FORMAT JSONCompactEachRow; TRUNCATE TABLE test_table_2; SELECT 8; -/* Check JSONCompactEachRowWithNamesAndTypes Output */ +/* Check JSONCompactEachRowWithNamesAndTypes and JSONCompactEachRowWithNamesAndTypes Input */ SET input_format_null_as_default = 0; INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", 1, "2", null]["second", 2, null, 6]; +INSERT INTO test_table FORMAT JSONCompactEachRowWithNames ["v1", "v2", "v3", "v4"]["first", 1, "2", null]["second", 2, null, 6]; SELECT * FROM test_table FORMAT JSONCompactEachRow; TRUNCATE TABLE test_table; SELECT 9; /* Check input_format_null_as_default = 1 */ SET input_format_null_as_default = 1; INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", 1, "2", null] ["second", 2, null, 6]; +INSERT INTO test_table FORMAT JSONCompactEachRowWithNames ["v1", "v2", "v3", "v4"]["first", 1, "2", null] ["second", 2, null, 6]; SELECT * FROM test_table FORMAT JSONCompactEachRow; SELECT 10; /* Check Header */ TRUNCATE TABLE test_table; SET input_format_skip_unknown_fields = 1; INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "v2", "invalid_column"]["String", "UInt8", "UInt8"]["first", 1, 32]["second", 2, "64"]; +INSERT INTO test_table FORMAT JSONCompactEachRowWithNames ["v1", "v2", "invalid_column"]["first", 1, 32]["second", 2, "64"]; SELECT * FROM test_table FORMAT JSONCompactEachRow; SELECT 11; TRUNCATE TABLE test_table; INSERT INTO test_table FORMAT JSONCompactEachRowWithNamesAndTypes ["v4", "v2", "v3"]["UInt8", "UInt8", "UInt16"][1, 2, 3] +INSERT INTO test_table FORMAT JSONCompactEachRowWithNames ["v4", "v2", "v3"][1, 2, 3] SELECT * FROM test_table FORMAT JSONCompactEachRowWithNamesAndTypes; +SELECT '----------'; +SELECT * FROM test_table FORMAT JSONCompactEachRowWithNames; SELECT 12; /* Check Nested */ INSERT INTO test_table_2 FORMAT JSONCompactEachRowWithNamesAndTypes ["v1", "n.id", "n.name"]["UInt8", "Array(UInt8)", "Array(String)"][16, [15, 16, null], ["first", "second", "third"]]; +INSERT INTO test_table_2 FORMAT JSONCompactEachRowWithNames ["v1", "n.id", "n.name"][16, [15, 16, null], ["first", "second", "third"]]; SELECT * FROM test_table_2 FORMAT JSONCompactEachRowWithNamesAndTypes; +SELECT '----------'; +SELECT * FROM test_table_2 FORMAT JSONCompactEachRowWithNames; DROP TABLE IF EXISTS test_table; DROP TABLE IF EXISTS test_table_2; diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.reference b/tests/queries/0_stateless/01195_formats_diagnostic_info.reference index 15fc31538ce..eddbb80198d 100644 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.reference +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.reference @@ -1,5 +1,5 @@ CSV -Column 2, name: d, type: Decimal(18, 10), parsed text: "123456789"ERROR +Column 2, name: d, type: Decimal(18, 10), parsed text: " 123456789"ERROR ERROR: garbage after DateTime: "7, Hello" ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format. ERROR: There is no line feed. "1" found instead. @@ -28,3 +28,14 @@ ERROR: There is no delimiter before field 1: expected "", got "7Hello< ERROR: There is no delimiter after last field: expected "", got "1" ERROR: There is no delimiter after last field: expected "", got "Hello" Column 0, name: t, type: DateTime, ERROR: text "" is not like DateTime +JSONCompactEachRow +Column 2, name: d, type: Decimal(18, 10), parsed text: " 123456789"ERROR +Column 0, name: t, type: DateTime, parsed text: "2020-04-21 12:34:56"ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format. +ERROR: garbage after DateTime: "7, Hello" +ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format. +ERROR: There is no closing parenthesis (']') at the end of the row. "," found instead. +Column 1, name: s, type: String, parsed text: ERROR +ERROR: There is no '[' before the row. +ERROR: garbage after Decimal(18, 10): ";" +ERROR: There is no comma. ";" found instead. +ERROR: Closing parenthesis (']') found where comma is expected. It's like your file has less columns than expected. diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh index 6c64b17f719..dde410d95c4 100755 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh @@ -38,3 +38,19 @@ echo -e '2020-04-21 12:34:567\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ER echo -e '2020-04-21 12:34:56\tHello\t12345678\t1' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\t\t123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\tHello\t12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" + +PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format JSONCompactEachRow) +echo '["2020-04-21 12:34:56", "Hello", 12345678]' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo "JSONCompactEachRow" +echo '["2020-04-21 12:34:56", "Hello", 123456789]' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo '["2020-04-21 12:34:567", "Hello", 123456789]' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo '["2020-04-21 12:34:56"7, "Hello", 123456789]' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo '["2020-04-21 12:34:56", "Hello", 12345678,1]' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo '["2020-04-21 12:34:56",,123Hello]' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo -e '["2020-04-21 12:34:56", "Hello", 12345678\n]' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo -e '"2020-04-21 12:34:56", "Hello", 12345678]' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo -e '["2020-04-21 12:34:56", "Hello", 12345678;' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo -e '["2020-04-21 12:34:56", "Hello", 12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo -e '["2020-04-21 12:34:56", "Hello", 12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo -e '["2020-04-21 12:34:56", "Hello"; 12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo -e '["2020-04-21 12:34:56", "Hello"\n' | "${PARSER[@]}" 2>&1| grep "ERROR" +echo -e '["2020-04-21 12:34:56", "Hello"]' | "${PARSER[@]}" 2>&1| grep "ERROR" diff --git a/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.reference b/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.reference index 6f1974ccd73..ffea4c736dc 100644 --- a/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.reference +++ b/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.reference @@ -7,7 +7,21 @@ number UInt64 0 1 +TSVRawWithNames +number +0 +1 +TSVRawWithNamesAndTypes +number +UInt64 +0 +1 CSVWithNames "number" 0 1 +CSVWithNamesAndTypes +"number" +"UInt64" +0 +1 diff --git a/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.sh b/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.sh index ad9cc2c53a8..69f3ab1c9a8 100755 --- a/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.sh +++ b/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.sh @@ -15,5 +15,14 @@ ${CLICKHOUSE_LOCAL} "${opts[@]}" --format TSVWithNames echo 'TSVWithNamesAndTypes' ${CLICKHOUSE_LOCAL} "${opts[@]}" --format TSVWithNamesAndTypes +echo 'TSVRawWithNames' +${CLICKHOUSE_LOCAL} "${opts[@]}" --format TSVWithNames + +echo 'TSVRawWithNamesAndTypes' +${CLICKHOUSE_LOCAL} "${opts[@]}" --format TSVWithNamesAndTypes + echo 'CSVWithNames' ${CLICKHOUSE_LOCAL} "${opts[@]}" --format CSVWithNames + +echo 'CSVWithNamesAndTypes' +${CLICKHOUSE_LOCAL} "${opts[@]}" --format CSVWithNamesAndTypes diff --git a/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.reference b/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.reference index 4f2a79b9905..78286b89a39 100644 --- a/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.reference +++ b/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.reference @@ -2,6 +2,11 @@ zero rows TSVWithNames TSVWithNamesAndTypes CSVWithNames +CSVWithNamesAndTypes +JSONCompactEachRowWithNames +JSONCompactEachRowWithNamesAndTypes +JSONCompactStringsEachRow +JSONCompactStringsEachRowWithNamesAndTypes multi clickhouse-local one file TSVWithNames 0 @@ -15,3 +20,23 @@ CSVWithNames 0 0 0 +CSVWithNamesAndTypes +0 +0 +0 +JSONCompactEachRowWithNames +0 +0 +0 +JSONCompactEachRowWithNamesAndTypes +0 +0 +0 +JSONCompactStringsEachRow +0 +0 +0 +JSONCompactStringsEachRowWithNamesAndTypes +0 +0 +0 diff --git a/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.sh b/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.sh index 469f7e7008b..a634f689dca 100755 --- a/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.sh +++ b/tests/queries/0_stateless/01375_storage_file_tsv_csv_with_names_write_prefix.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -6,26 +7,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # zero rows echo 'zero rows' -for format in TSVWithNames TSVWithNamesAndTypes CSVWithNames; do +for format in TSVWithNames TSVWithNamesAndTypes CSVWithNames CSVWithNamesAndTypes JSONCompactEachRowWithNames JSONCompactEachRowWithNamesAndTypes JSONCompactStringsEachRow JSONCompactStringsEachRowWithNamesAndTypes; do echo $format ${CLICKHOUSE_LOCAL} --query=" - CREATE TABLE ${format}_01375 ENGINE File($format, '01375_$format.tsv') AS SELECT * FROM numbers(1) WHERE number < 0; + CREATE TABLE ${format}_01375 ENGINE File($format, '01375_$format') AS SELECT * FROM numbers(1) WHERE number < 0; SELECT * FROM ${format}_01375; DROP TABLE ${format}_01375; " - rm 01375_$format.tsv + rm 01375_$format done # run multiple times to the same file echo 'multi clickhouse-local one file' -for format in TSVWithNames TSVWithNamesAndTypes CSVWithNames; do +for format in TSVWithNames TSVWithNamesAndTypes CSVWithNames CSVWithNamesAndTypes JSONCompactEachRowWithNames JSONCompactEachRowWithNamesAndTypes JSONCompactStringsEachRow JSONCompactStringsEachRowWithNamesAndTypes; do echo $format for _ in {1..2}; do ${CLICKHOUSE_LOCAL} --query=" - CREATE TABLE ${format}_01375 ENGINE File($format, '01375_$format.tsv') AS SELECT * FROM numbers(1); + CREATE TABLE ${format}_01375 ENGINE File($format, '01375_$format') AS SELECT * FROM numbers(1); SELECT * FROM ${format}_01375; DROP TABLE ${format}_01375; " done - rm 01375_$format.tsv + rm 01375_$format done diff --git a/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference b/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference index fb1a066f272..8a69cf26ffd 100644 --- a/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference +++ b/tests/queries/0_stateless/01448_json_compact_strings_each_row.reference @@ -12,6 +12,11 @@ ["1", "a"] ["2", "b"] ["3", "c"] +---------- +["value", "name"] +["1", "a"] +["2", "b"] +["3", "c"] 4 ["name", "c"] ["String", "UInt64"] @@ -31,17 +36,33 @@ 8 ["first", "1", "2", "0"] ["second", "2", "0", "6"] +["first", "1", "2", "0"] +["second", "2", "0", "6"] 9 ["first", "1", "2", "8"] ["second", "2", "32", "6"] +["first", "1", "2", "8"] +["second", "2", "32", "6"] 10 ["first", "1", "16", "8"] ["second", "2", "32", "8"] +["first", "1", "16", "8"] +["second", "2", "32", "8"] 11 ["v1", "v2", "v3", "v4"] ["String", "UInt8", "UInt16", "UInt8"] ["", "2", "3", "1"] +["", "2", "3", "1"] +--------- +["v1", "v2", "v3", "v4"] +["", "2", "3", "1"] +["", "2", "3", "1"] 12 ["v1", "n.id", "n.name"] ["UInt8", "Array(UInt8)", "Array(String)"] ["16", "[15,16,17]", "['first','second','third']"] +["16", "[15,16,17]", "['first','second','third']"] +--------- +["v1", "n.id", "n.name"] +["16", "[15,16,17]", "['first','second','third']"] +["16", "[15,16,17]", "['first','second','third']"] diff --git a/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql b/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql index 925faa3a17f..869041193cf 100644 --- a/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql +++ b/tests/queries/0_stateless/01448_json_compact_strings_each_row.sql @@ -12,8 +12,10 @@ SELECT 2; /* Check Totals */ SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactStringsEachRow; SELECT 3; -/* Check JSONCompactStringsEachRowWithNamesAndTypes Output */ +/* Check JSONCompactStringsEachRowWithNames and JSONCompactStringsEachRowWithNamesAndTypes Output */ SELECT * FROM test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes; +SELECT '----------'; +SELECT * FROM test_table FORMAT JSONCompactStringsEachRowWithNames; SELECT 4; /* Check Totals */ SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactStringsEachRowWithNamesAndTypes; @@ -37,30 +39,39 @@ INSERT INTO test_table_2 FORMAT JSONCompactStringsEachRow ["16", "[15, 16, 17]", SELECT * FROM test_table_2 FORMAT JSONCompactStringsEachRow; TRUNCATE TABLE test_table_2; SELECT 8; -/* Check JSONCompactStringsEachRowWithNamesAndTypes Output */ +/* Check JSONCompactStringsEachRowWithNames and JSONCompactStringsEachRowWithNamesAndTypes Input */ SET input_format_null_as_default = 0; INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"]["second", "2", "null", "6"]; +INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNames ["v1", "v2", "v3", "v4"]["first", "1", "2", "null"]["second", "2", "null", "6"]; SELECT * FROM test_table FORMAT JSONCompactStringsEachRow; TRUNCATE TABLE test_table; SELECT 9; /* Check input_format_null_as_default = 1 */ SET input_format_null_as_default = 1; INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "v3", "v4"]["String","UInt8","UInt16","UInt8"]["first", "1", "2", "null"] ["second", "2", "null", "6"]; +INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNames ["v1", "v2", "v3", "v4"]["first", "1", "2", "null"] ["second", "2", "null", "6"]; SELECT * FROM test_table FORMAT JSONCompactStringsEachRow; SELECT 10; /* Check Header */ TRUNCATE TABLE test_table; SET input_format_skip_unknown_fields = 1; INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "v2", "invalid_column"]["String", "UInt8", "UInt8"]["first", "1", "32"]["second", "2", "64"]; +INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNames ["v1", "v2", "invalid_column"]["first", "1", "32"]["second", "2", "64"]; SELECT * FROM test_table FORMAT JSONCompactStringsEachRow; SELECT 11; TRUNCATE TABLE test_table; INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v4", "v2", "v3"]["UInt8", "UInt8", "UInt16"]["1", "2", "3"] +INSERT INTO test_table FORMAT JSONCompactStringsEachRowWithNames ["v4", "v2", "v3"]["1", "2", "3"] SELECT * FROM test_table FORMAT JSONCompactStringsEachRowWithNamesAndTypes; +SELECT '---------'; +SELECT * FROM test_table FORMAT JSONCompactStringsEachRowWithNames; SELECT 12; /* Check Nested */ INSERT INTO test_table_2 FORMAT JSONCompactStringsEachRowWithNamesAndTypes ["v1", "n.id", "n.name"]["UInt8", "Array(UInt8)", "Array(String)"]["16", "[15, 16, 17]", "['first', 'second', 'third']"]; +INSERT INTO test_table_2 FORMAT JSONCompactStringsEachRowWithNames ["v1", "n.id", "n.name"]["16", "[15, 16, 17]", "['first', 'second', 'third']"]; SELECT * FROM test_table_2 FORMAT JSONCompactStringsEachRowWithNamesAndTypes; +SELECT '---------'; +SELECT * FROM test_table_2 FORMAT JSONCompactStringsEachRowWithNames; DROP TABLE IF EXISTS test_table; DROP TABLE IF EXISTS test_table_2; diff --git a/tests/queries/0_stateless/01474_custom_null_tsv.sh b/tests/queries/0_stateless/01474_custom_null_tsv.sh index 9dc1c4b7777..fb5939faf5e 100755 --- a/tests/queries/0_stateless/01474_custom_null_tsv.sh +++ b/tests/queries/0_stateless/01474_custom_null_tsv.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE tsv_custom_null (id Nullable(UInt32)) E $CLICKHOUSE_CLIENT --query="INSERT INTO tsv_custom_null VALUES (NULL)"; -$CLICKHOUSE_CLIENT --output_format_tsv_null_representation='MyNull' --query="SELECT * FROM tsv_custom_null FORMAT TSV"; +$CLICKHOUSE_CLIENT --format_tsv_null_representation='MyNull' --query="SELECT * FROM tsv_custom_null FORMAT TSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE tsv_custom_null"; diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql index 4648889ca27..5b6ed440ba4 100644 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql +++ b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql @@ -3,6 +3,8 @@ CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20); SET optimize_syntax_fuse_functions = 1; +SET optimize_fuse_sum_count_avg = 1; + SELECT sum(a), sum(b), count(b) from fuse_tbl; EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl; SELECT '---------NOT trigger fuse--------'; diff --git a/tests/queries/0_stateless/01852_map_combinator.reference b/tests/queries/0_stateless/01852_map_combinator.reference new file mode 100644 index 00000000000..4ad4ea8071e --- /dev/null +++ b/tests/queries/0_stateless/01852_map_combinator.reference @@ -0,0 +1,34 @@ +1 {1:10,2:10,3:10} +1 {3:10,4:10,5:10} +2 {4:10,5:10,6:10} +2 {6:10,7:10,8:10} +3 {1:10,2:10,3:10} +4 {3:10,4:10,5:10} +5 {4:10,5:10,6:10} +5 {6:10,7:10,8:10} +Map(UInt16, UInt64) {1:20,2:20,3:40,4:40,5:40,6:40,7:20,8:20} +Map(UInt16, UInt32) {1:20,2:20,3:40,4:40,5:40,6:40,7:20,8:20} +Map(UInt16, UInt64) {1:20,2:20,3:40,4:40,5:40,6:40,7:20,8:20} +{1:10,2:10,3:10,4:10,5:10,6:10,7:10,8:10} +{1:10,2:10,3:10,4:10,5:10,6:10,7:10,8:10} +Map(UInt16, Float64) {1:10,2:10,3:10,4:10,5:10,6:10,7:10,8:10} +{1:2,2:2,3:4,4:4,5:4,6:4,7:2,8:2} +1 {1:10,2:10,3:20,4:10,5:10} +2 {4:10,5:10,6:20,7:10,8:10} +3 {1:10,2:10,3:10} +4 {3:10,4:10,5:10} +5 {4:10,5:10,6:20,7:10,8:10} +{'01234567-89ab-cdef-0123-456789abcdef':1} +{'1':'1'} +{'1':'1'} +{1:1} +{'1970-01-02':1} +{'1970-01-01 03:00:01':1} +{'a':1} +{'1':'2'} +{1:1} +{1:1} +{1:1} +{1:1} +{1:1,2:2,3:6,4:8,5:10,6:12,7:7,8:8} +{1:1,2:2,3:6,4:8,5:10,6:12,7:7,8:8} diff --git a/tests/queries/0_stateless/01852_map_combinator.sql b/tests/queries/0_stateless/01852_map_combinator.sql new file mode 100644 index 00000000000..20923460eb6 --- /dev/null +++ b/tests/queries/0_stateless/01852_map_combinator.sql @@ -0,0 +1,57 @@ +SET send_logs_level = 'fatal'; +SET allow_experimental_map_type = 1; + +DROP TABLE IF EXISTS map_comb; +CREATE TABLE map_comb(a int, statusMap Map(UInt16, UInt32)) ENGINE = Log; + +INSERT INTO map_comb VALUES (1, map(1, 10, 2, 10, 3, 10)),(1, map(3, 10, 4, 10, 5, 10)),(2, map(4, 10, 5, 10, 6, 10)),(2, map(6, 10, 7, 10, 8, 10)),(3, map(1, 10, 2, 10, 3, 10)),(4, map(3, 10, 4, 10, 5, 10)),(5, map(4, 10, 5, 10, 6, 10)),(5, map(6, 10, 7, 10, 8, 10)); + +SELECT * FROM map_comb ORDER BY a; +SELECT toTypeName(res), sumMap(statusMap) as res FROM map_comb; +SELECT toTypeName(res), sumWithOverflowMap(statusMap) as res FROM map_comb; +SELECT toTypeName(res), sumMapMerge(s) as res FROM (SELECT sumMapState(statusMap) AS s FROM map_comb); +SELECT minMap(statusMap) FROM map_comb; +SELECT maxMap(statusMap) FROM map_comb; +SELECT toTypeName(res), avgMap(statusMap) as res FROM map_comb; +SELECT countMap(statusMap) FROM map_comb; +SELECT a, sumMap(statusMap) FROM map_comb GROUP BY a ORDER BY a; + +DROP TABLE map_comb; + +-- check different types +select minMap(val) from values ('val Map(UUID, Int32)', + (map('01234567-89ab-cdef-0123-456789abcdef', 1)), + (map('01234567-89ab-cdef-0123-456789abcdef', 2))); +select minMap(val) from values ('val Map(String, String)', (map('1', '1')), (map('1', '2'))); +select minMap(val) from values ('val Map(FixedString(1), FixedString(1))', (map('1', '1')), (map('1', '2'))); +select minMap(val) from values ('val Map(UInt64, UInt64)', (map(1, 1)), (map(1, 2))); +select minMap(val) from values ('val Map(Date, Int16)', (map(1, 1)), (map(1, 2))); +select minMap(val) from values ('val Map(DateTime(\'Europe/Moscow\'), Int32)', (map(1, 1)), (map(1, 2))); +select minMap(val) from values ('val Map(Enum16(\'a\'=1), Int16)', (map('a', 1)), (map('a', 2))); +select maxMap(val) from values ('val Map(String, String)', (map('1', '1')), (map('1', '2'))); +select minMap(val) from values ('val Map(Int128, Int128)', (map(1, 1)), (map(1, 2))); +select minMap(val) from values ('val Map(Int256, Int256)', (map(1, 1)), (map(1, 2))); +select minMap(val) from values ('val Map(UInt128, UInt128)', (map(1, 1)), (map(1, 2))); +select minMap(val) from values ('val Map(UInt256, UInt256)', (map(1, 1)), (map(1, 2))); + +select sumMap(map(1,2), 1, 2); -- { serverError 42 } +select sumMap(map(1,2), map(1,3)); -- { serverError 42 } + +-- array and tuple arguments +select avgMap([1,1,1], [2,2,2]); -- { serverError 43 } +select minMap((1,1)); -- { serverError 43 } +select minMap(([1,1,1],1)); -- { serverError 43 } +select minMap([1,1,1],1); -- { serverError 43 } +select minMap([1,1,1]); -- { serverError 43 } +select minMap(([1,1,1])); -- { serverError 43 } + +DROP TABLE IF EXISTS sum_map_decimal; + +CREATE TABLE sum_map_decimal(statusMap Map(UInt16,Decimal32(5))) ENGINE = Log; + +INSERT INTO sum_map_decimal VALUES (map(1,'1.0',2,'2.0',3,'3.0')), (map(3,'3.0',4,'4.0',5,'5.0')), (map(4,'4.0',5,'5.0',6,'6.0')), (map(6,'6.0',7,'7.0',8,'8.0')); + +SELECT sumMap(statusMap) FROM sum_map_decimal; +SELECT sumWithOverflowMap(statusMap) FROM sum_map_decimal; + +DROP TABLE sum_map_decimal; diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference index 593f2fb2d20..fd8989611a8 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.reference +++ b/tests/queries/0_stateless/01889_sql_json_functions.reference @@ -9,6 +9,7 @@ null +"bar" --JSON_QUERY-- [{"hello":1}] [1] diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql index 087f029e635..f68fe63ecab 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.sql +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -11,6 +11,7 @@ SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello'); SELECT JSON_VALUE('{"hello":{"world":"!"}}', '$.hello'); SELECT JSON_VALUE('{hello:world}', '$.hello'); -- invalid json => default value (empty string) SELECT JSON_VALUE('', '$.hello'); +SELECT JSON_VALUE('{"foo foo":"bar"}', '$."foo foo"'); SELECT '--JSON_QUERY--'; SELECT JSON_QUERY('{"hello":1}', '$'); diff --git a/tests/queries/0_stateless/01944_insert_partition_by.sql b/tests/queries/0_stateless/01944_insert_partition_by.sql index 5396ca4daf6..ac38fcee490 100644 --- a/tests/queries/0_stateless/01944_insert_partition_by.sql +++ b/tests/queries/0_stateless/01944_insert_partition_by.sql @@ -1,7 +1,6 @@ -- Tags: no-fasttest -- Tag no-fasttest: needs s3 -INSERT INTO TABLE FUNCTION file('foo.csv', 'CSV', 'id Int32, val Int32') PARTITION BY val VALUES (1, 1), (2, 2); -- { serverError NOT_IMPLEMENTED } INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, '\r\n'); -- { serverError CANNOT_PARSE_TEXT } INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc\x00abc'); -- { serverError CANNOT_PARSE_TEXT } INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc\xc3\x28abc'); -- { serverError CANNOT_PARSE_TEXT } diff --git a/tests/queries/0_stateless/02000_join_on_const.reference b/tests/queries/0_stateless/02000_join_on_const.reference new file mode 100644 index 00000000000..b9494e4689c --- /dev/null +++ b/tests/queries/0_stateless/02000_join_on_const.reference @@ -0,0 +1,31 @@ +1 +1 +1 +1 +1 +1 +- ON NULL - +- inner - +- left - +1 0 +2 0 +- right - +0 2 +0 3 +- full - +0 2 +0 3 +1 0 +2 0 +- inner - +- left - +1 \N +2 \N +- right - +\N 2 +\N 3 +- full - +\N 2 +\N 3 +1 \N +2 \N diff --git a/tests/queries/0_stateless/02000_join_on_const.sql b/tests/queries/0_stateless/02000_join_on_const.sql new file mode 100644 index 00000000000..f6d686cf9bc --- /dev/null +++ b/tests/queries/0_stateless/02000_join_on_const.sql @@ -0,0 +1,55 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (id Int) ENGINE = Memory; +CREATE TABLE t2 (id Int) ENGINE = Memory; + +INSERT INTO t1 VALUES (1), (2); +INSERT INTO t2 VALUES (2), (3); + +SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 JOIN t2 ON 1 = 1; +SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 JOIN t2 ON 1; +SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 JOIN t2 ON 2 = 2 AND 3 = 3; +SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 INNER ANY JOIN t2 ON toNullable(1); +SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 INNER ANY JOIN t2 ON toLowCardinality(1); +SELECT 70 = 10 * sum(t1.id) + sum(t2.id) AND count() == 4 FROM t1 INNER ANY JOIN t2 ON toLowCardinality(toNullable(1)); + +SELECT * FROM t1 INNER ANY JOIN t2 ON toNullable(toLowCardinality(1)); -- { serverError 403 } +SELECT * FROM t1 INNER ANY JOIN t2 ON toUInt16(1); -- { serverError 403 } +SELECT * FROM t1 INNER ANY JOIN t2 ON toInt8(1); -- { serverError 403 } +SELECT * FROM t1 INNER ANY JOIN t2 ON 256; -- { serverError 403 } +SELECT * FROM t1 INNER ANY JOIN t2 ON -1; -- { serverError 403 } +SELECT * FROM t1 INNER ANY JOIN t2 ON toString(1); -- { serverError 403 } + +SELECT '- ON NULL -'; + +SELECT '- inner -'; +SELECT * FROM t1 INNER ANY JOIN t2 ON NULL; +SELECT * FROM t1 INNER ANY JOIN t2 ON 0; +SELECT * FROM t1 INNER ANY JOIN t2 ON 1 = 2; +SELECT '- left -'; +SELECT * FROM t1 LEFT JOIN t2 ON NULL ORDER BY t1.id, t2.id; +SELECT '- right -'; +SELECT * FROM t1 RIGHT JOIN t2 ON NULL ORDER BY t1.id, t2.id; +SELECT '- full -'; +SELECT * FROM t1 FULL JOIN t2 ON NULL ORDER BY t1.id, t2.id; + +SELECT '- inner -'; +SELECT * FROM t1 INNER ANY JOIN t2 ON NULL ORDER BY t1.id NULLS FIRST, t2.id SETTINGS join_use_nulls = 1; +SELECT '- left -'; +SELECT * FROM t1 LEFT JOIN t2 ON NULL ORDER BY t1.id NULLS FIRST, t2.id SETTINGS join_use_nulls = 1; +SELECT '- right -'; +SELECT * FROM t1 RIGHT JOIN t2 ON NULL ORDER BY t1.id NULLS FIRST, t2.id SETTINGS join_use_nulls = 1; +SELECT '- full -'; +SELECT * FROM t1 FULL JOIN t2 ON NULL ORDER BY t1.id NULLS FIRST, t2.id SETTINGS join_use_nulls = 1; + +SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } +SELECT * FROM t1 JOIN t2 ON 1 = 1 SETTINGS join_algorithm = 'auto'; -- { serverError 48 } +SELECT * FROM t1 JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } +SELECT * FROM t1 LEFT JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } +SELECT * FROM t1 RIGHT JOIN t2 ON NULL SETTINGS join_algorithm = 'auto'; -- { serverError 48 } +SELECT * FROM t1 FULL JOIN t2 ON NULL SETTINGS join_algorithm = 'partial_merge'; -- { serverError 48 } + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + diff --git a/tests/queries/0_stateless/02001_join_on_const_bs_long.reference b/tests/queries/0_stateless/02001_join_on_const_bs_long.reference new file mode 100644 index 00000000000..b45724ee906 --- /dev/null +++ b/tests/queries/0_stateless/02001_join_on_const_bs_long.reference @@ -0,0 +1,41 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02001_join_on_const_bs_long.sql.j2 b/tests/queries/0_stateless/02001_join_on_const_bs_long.sql.j2 new file mode 100644 index 00000000000..1726bcb7062 --- /dev/null +++ b/tests/queries/0_stateless/02001_join_on_const_bs_long.sql.j2 @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (id Int) ENGINE = MergeTree ORDER BY id; +CREATE TABLE t2 (id Int) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1 VALUES (1), (2); +INSERT INTO t2 SELECT number + 5 AS x FROM (SELECT * FROM system.numbers LIMIT 1111); + +SET max_block_size = 100; + +SELECT count() == 2222 FROM t1 JOIN t2 ON 1 = 1; + +{% for bs in [90, 95, 99, 100, 101, 110, 111, 128] -%} + +SET max_block_size = {{ bs }}; + +SELECT count() == 0 FROM t1 JOIN t2 ON 1 = 2; +SELECT count() == 2 FROM t1 LEFT JOIN t2 ON 1 = 2; +SELECT count() == 1111 FROM t1 RIGHT JOIN t2 ON 1 = 2; +SELECT count() == 1113 FROM t1 FULL JOIN t2 ON 1 = 2; +SELECT max(blockSize()) <= {{ bs }} FROM t1 FULL JOIN t2 ON 1 = 2; + +{% endfor %} + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh index 6babcc1e4f1..acd1c464334 100755 --- a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh +++ b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-parallel set -eu diff --git a/tests/queries/0_stateless/02029_output_csv_null_representation.reference b/tests/queries/0_stateless/02029_output_csv_null_representation.reference index a5174f4424f..eda4b09e312 100644 --- a/tests/queries/0_stateless/02029_output_csv_null_representation.reference +++ b/tests/queries/0_stateless/02029_output_csv_null_representation.reference @@ -1,4 +1,4 @@ -# output_format_csv_null_representation should initially be \\N +# format_csv_null_representation should initially be \\N "val1",\N,"val3" -# Changing output_format_csv_null_representation +# Changing format_csv_null_representation "val1",∅,"val3" diff --git a/tests/queries/0_stateless/02029_output_csv_null_representation.sql b/tests/queries/0_stateless/02029_output_csv_null_representation.sql index 772c6c89144..a27c552ee60 100644 --- a/tests/queries/0_stateless/02029_output_csv_null_representation.sql +++ b/tests/queries/0_stateless/02029_output_csv_null_representation.sql @@ -7,10 +7,10 @@ CREATE TABLE test_data ( INSERT INTO test_data VALUES ('val1', NULL, 'val3'); -SELECT '# output_format_csv_null_representation should initially be \\N'; +SELECT '# format_csv_null_representation should initially be \\N'; SELECT * FROM test_data FORMAT CSV; -SELECT '# Changing output_format_csv_null_representation'; -SET output_format_csv_null_representation = '∅'; +SELECT '# Changing format_csv_null_representation'; +SET format_csv_null_representation = '∅'; SELECT * FROM test_data FORMAT CSV; -SET output_format_csv_null_representation = '\\N'; +SET format_csv_null_representation = '\\N'; diff --git a/tests/queries/0_stateless/02048_parallel_reading_from_infile.reference b/tests/queries/0_stateless/02048_parallel_reading_from_infile.reference new file mode 100644 index 00000000000..98c00d52990 --- /dev/null +++ b/tests/queries/0_stateless/02048_parallel_reading_from_infile.reference @@ -0,0 +1,5 @@ +1 +2 +Correct +1 +2 diff --git a/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh b/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh new file mode 100755 index 00000000000..d53fe8dd305 --- /dev/null +++ b/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +[ -e "${CLICKHOUSE_TMP}"/test_infile_parallel.gz ] && rm "${CLICKHOUSE_TMP}"/test_infile_parallel.gz +[ -e "${CLICKHOUSE_TMP}"/test_infile_parallel ] && rm "${CLICKHOUSE_TMP}"/test_infile_parallel +[ -e "${CLICKHOUSE_TMP}"/test_infile_parallel ] && rm "${CLICKHOUSE_TMP}"/test_infile_parallel_1 +[ -e "${CLICKHOUSE_TMP}"/test_infile_parallel ] && rm "${CLICKHOUSE_TMP}"/test_infile_parallel_2 +[ -e "${CLICKHOUSE_TMP}"/test_infile_parallel ] && rm "${CLICKHOUSE_TMP}"/test_infile_parallel_3 + +echo -e "102\t2" > "${CLICKHOUSE_TMP}"/test_infile_parallel +echo -e "102\tsecond" > "${CLICKHOUSE_TMP}"/test_infile_parallel_1 +echo -e "103\tfirst" > "${CLICKHOUSE_TMP}"/test_infile_parallel_2 +echo -e "103" > "${CLICKHOUSE_TMP}"/test_infile_parallel_3 + +gzip "${CLICKHOUSE_TMP}"/test_infile_parallel + +${CLICKHOUSE_CLIENT} --multiquery <&1 | grep -q "36" && echo "Correct" || echo 'Fail' + +${CLICKHOUSE_LOCAL} --multiquery <&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +echo -e "y\tz\tx\nString\tDate\tUInt32\ntext\t2020-01-01\t1" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT TSVWithNamesAndTypes" && echo 'OK' || echo 'FAIL' +echo -e "x\tz\ty\nUInt32\tString\tDate\n1\ttext\t2020-01-01" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT TSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' + + +echo "CSVWithNamesAndTypes" +echo -e "'x','y','z'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +echo -e "'y','z','x'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" && echo 'OK' || echo 'FAIL' +echo -e "'x','z','y'\n'UInt32','String',Date'\n1,'text','2020-01-01'" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' + + +echo "JSONCompactEachRowWithNamesAndTypes" +echo -e '["x","y","z"]\n["String","Date","UInt32"]\n["text","2020-01-01",1]' | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT JSONCompactEachRowWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +echo -e '["y","z","x"]\n["String","Date","UInt32"]\n["text","2020-01-01",1]' | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT JSONCompactEachRowWithNamesAndTypes" && echo 'OK' || echo 'FAIL' +echo -e '["x","z","y"]\n["UInt32", "String", "Date"]\n[1, "text","2020-01-01"]' | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT JSONCompactEachRowWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' + +echo "JSONCompactStringsEachRowWithNamesAndTypes" +echo -e '["x","y","z"]\n["String","Date","UInt32"]\n["text","2020-01-01","1"]' | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT JSONCompactStringsEachRowWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +echo -e '["y","z","x"]\n["String","Date","UInt32"]\n["text","2020-01-01","1"]' | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT JSONCompactStringsEachRowWithNamesAndTypes" && echo 'OK' || echo 'FAIL' +echo -e '["x","z","y"]\n["UInt32", "String", "Date"]\n["1", "text","2020-01-01"]' | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT JSONCompactStringsEachRowWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' + +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02098" diff --git a/tests/queries/0_stateless/02099_tsv_raw_format.reference b/tests/queries/0_stateless/02099_tsv_raw_format.reference new file mode 100644 index 00000000000..de46cf8dff7 --- /dev/null +++ b/tests/queries/0_stateless/02099_tsv_raw_format.reference @@ -0,0 +1,113 @@ +TSVRaw +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +TSVRawWithNames +number string date +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +TSVRawWithNamesAndTypes +number string date +UInt64 String Date +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +TabSeparatedRaw +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +TabSeparatedRawWithNames +number string date +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +TabSeparatedRawWithNamesAndTypes +number string date +UInt64 String Date +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +0 +\N +2 +\N +nSome text +b1cad4eb4be08a40387c9de70d02fcc2 - +b1cad4eb4be08a40387c9de70d02fcc2 - diff --git a/tests/queries/0_stateless/02099_tsv_raw_format.sh b/tests/queries/0_stateless/02099_tsv_raw_format.sh new file mode 100755 index 00000000000..16b695e4037 --- /dev/null +++ b/tests/queries/0_stateless/02099_tsv_raw_format.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02099" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02099 (number UInt64, string String, date Date) ENGINE=Memory()" + +FORMATS=('TSVRaw' 'TSVRawWithNames' 'TSVRawWithNamesAndTypes' 'TabSeparatedRaw' 'TabSeparatedRawWithNames' 'TabSeparatedRawWithNamesAndTypes') + +for format in "${FORMATS[@]}" +do + echo $format + $CLICKHOUSE_CLIENT -q "INSERT INTO test_02099 SELECT number, toString(number), toDate(number) FROM numbers(3)" + $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02099 FORMAT $format" + + $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02099 FORMAT $format" | $CLICKHOUSE_CLIENT -q "INSERT INTO test_02099 FORMAT $format" + $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02099" + + $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02099" +done + +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02099" + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_nullable_02099" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_nullable_02099 ENGINE=Memory() AS SELECT number % 2 ? NULL : number from numbers(4)"; + +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_nullable_02099 FORMAT TSVRaw" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_nullable_02099 FORMAT TSVRaw" | $CLICKHOUSE_CLIENT -q "INSERT INTO test_nullable_02099 FORMAT TSVRaw" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_nullable_02099" + + +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_nullable_02099 FORMAT TSV" | $CLICKHOUSE_CLIENT -q "INSERT INTO test_nullable_02099 FORMAT TSVRaw" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_nullable_02099 FORMAT TSVRaw" | $CLICKHOUSE_CLIENT -q "INSERT INTO test_nullable_02099 FORMAT TSV" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_nullable_02099" + +$CLICKHOUSE_CLIENT -q "DROP TABLE test_nullable_02099" + + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_nullable_string_02099" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_nullable_string_02099 (s Nullable(String)) ENGINE=Memory()"; + +echo 'nSome text' | $CLICKHOUSE_CLIENT -q "INSERT INTO test_nullable_string_02099 FORMAT TSVRaw" + +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_nullable_string_02099" +$CLICKHOUSE_CLIENT -q "DROP TABLE test_nullable_string_02099" + + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_parallel_parsing_02099" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_parallel_parsing_02099 (x UInt64, a Array(UInt64), s String) ENGINE=Memory()"; +$CLICKHOUSE_CLIENT -q "SELECT number AS x, range(number % 50) AS a, toString(a) AS s FROM numbers(1000000) FORMAT TSVRaw" | $CLICKHOUSE_CLIENT --input_format_parallel_parsing=0 -q "INSERT INTO test_parallel_parsing_02099 FORMAT TSVRaw" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_parallel_parsing_02099 ORDER BY x" | md5sum + +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_parallel_parsing_02099" + +$CLICKHOUSE_CLIENT -q "SELECT number AS x, range(number % 50) AS a, toString(a) AS s FROM numbers(1000000) FORMAT TSVRaw" | $CLICKHOUSE_CLIENT --input_format_parallel_parsing=1 -q "INSERT INTO test_parallel_parsing_02099 FORMAT TSVRaw" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_parallel_parsing_02099 ORDER BY x" | md5sum + +$CLICKHOUSE_CLIENT -q "DROP TABLE test_parallel_parsing_02099" + diff --git a/tests/queries/0_stateless/02100_low_cardinality_nullable_null_default.reference b/tests/queries/0_stateless/02100_low_cardinality_nullable_null_default.reference new file mode 100644 index 00000000000..12b4d6ad854 --- /dev/null +++ b/tests/queries/0_stateless/02100_low_cardinality_nullable_null_default.reference @@ -0,0 +1,14 @@ +CSV +\N +TSV +\N +TSVRaw +\N +TSKV +\N +JSONCompactEachRow +\N +JSONEachRow +\N +Values +\N diff --git a/tests/queries/0_stateless/02100_low_cardinality_nullable_null_default.sh b/tests/queries/0_stateless/02100_low_cardinality_nullable_null_default.sh new file mode 100755 index 00000000000..2fd1f130b7a --- /dev/null +++ b/tests/queries/0_stateless/02100_low_cardinality_nullable_null_default.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02100" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02100 (x LowCardinality(Nullable(String)) DEFAULT 'default') ENGINE=Memory()" + +FORMATS=('CSV' 'TSV' 'TSVRaw' 'TSKV' 'JSONCompactEachRow' 'JSONEachRow' 'Values') + +for format in "${FORMATS[@]}" +do + echo $format + $CLICKHOUSE_CLIENT -q "SELECT NULL as x FORMAT $format" | $CLICKHOUSE_CLIENT -q "INSERT INTO test_02100 FORMAT $format" + + $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02100" + + $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02100" +done + +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02100" + diff --git a/tests/queries/0_stateless/02101_empty_as_default_and_omitted_fields.reference b/tests/queries/0_stateless/02101_empty_as_default_and_omitted_fields.reference new file mode 100644 index 00000000000..61444c7a238 --- /dev/null +++ b/tests/queries/0_stateless/02101_empty_as_default_and_omitted_fields.reference @@ -0,0 +1,16 @@ +TSV +1 42 +2 0 +3 42 +4 0 +CSV +1 42 +2 0 +3 42 +4 0 +JSONEachRow +1 42 +2 0 +JSONCompactEachRow +1 42 +2 0 diff --git a/tests/queries/0_stateless/02101_empty_as_default_and_omitted_fields.sh b/tests/queries/0_stateless/02101_empty_as_default_and_omitted_fields.sh new file mode 100755 index 00000000000..4f03c72cac3 --- /dev/null +++ b/tests/queries/0_stateless/02101_empty_as_default_and_omitted_fields.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02101" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02101 (x UInt64, y UInt64 DEFAULT 42) ENGINE=Memory()" + +echo 'TSV' +echo -e 'x\ty\n1\t' | $CLICKHOUSE_CLIENT --input_format_tsv_empty_as_default=1 --input_format_defaults_for_omitted_fields=1 -q "INSERT INTO test_02101 FORMAT TSVWithNames" +echo -e 'x\ty\n2\t' | $CLICKHOUSE_CLIENT --input_format_tsv_empty_as_default=1 --input_format_defaults_for_omitted_fields=0 -q "INSERT INTO test_02101 FORMAT TSVWithNames" +echo -e 'x\tz\n3\t123' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_skip_unknown_fields=1 -q "INSERT INTO test_02101 FORMAT TSVWithNames" +echo -e 'x\tz\n4\t123' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_skip_unknown_fields=1 -q "INSERT INTO test_02101 FORMAT TSVWithNames" + +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02101 ORDER BY x" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02101" + +echo 'CSV' +echo -e '"x","y"\n1,' | $CLICKHOUSE_CLIENT --input_format_csv_empty_as_default=1 --input_format_defaults_for_omitted_fields=1 -q "INSERT INTO test_02101 FORMAT CSVWithNames" +echo -e '"x","y"\n2,' | $CLICKHOUSE_CLIENT --input_format_csv_empty_as_default=1 --input_format_defaults_for_omitted_fields=0 -q "INSERT INTO test_02101 FORMAT CSVWithNames" +echo -e '"x","z"\n3,123' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_skip_unknown_fields=1 -q "INSERT INTO test_02101 FORMAT CSVWithNames" +echo -e '"x","z"\n4,123' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_skip_unknown_fields=1 -q "INSERT INTO test_02101 FORMAT CSVWithNames" + +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02101 ORDER BY x" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02101" + +echo 'JSONEachRow' +echo -e '{"x" : 1, "z" : 123}' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_skip_unknown_fields=1 -q "INSERT INTO test_02101 FORMAT JSONEachRow" +echo -e '{"x" : 2, "z" : 123}' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_skip_unknown_fields=1 -q "INSERT INTO test_02101 FORMAT JSONEachRow" + +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02101 ORDER BY x" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02101" + +echo 'JSONCompactEachRow' +echo -e '["x", "z"], [1, 123]' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_skip_unknown_fields=1 -q "INSERT INTO test_02101 FORMAT JSONCompactEachRowWithNames" +echo -e '["x", "z"], [2, 123]' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_skip_unknown_fields=1 -q "INSERT INTO test_02101 FORMAT JSONCompactEachRowWithNames" + +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02101 ORDER BY x" +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02101" + diff --git a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference new file mode 100644 index 00000000000..9011f20cd6a --- /dev/null +++ b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.reference @@ -0,0 +1,14 @@ +1 text 2020-01-01 +1 text 2020-01-01 +1 text 2020-01-01 +1 text 2020-01-01 +1 text 2020-01-01 +1 text 2020-01-01 +1 default 1970-01-01 +1 default 1970-01-01 +1 1970-01-01 +1 1970-01-01 +OK +1 default 1970-01-01 +OK +OK diff --git a/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh new file mode 100755 index 00000000000..e7307ad3ad5 --- /dev/null +++ b/tests/queries/0_stateless/02102_row_binary_with_names_and_types.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02102" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02102 (x UInt32, y String DEFAULT 'default', z Date) engine=Memory()" + + + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=0 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=0 --input_format_with_types_use_header=0 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + + +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS y, toDate('2020-01-01') AS z, toUInt32(1) AS x FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS y, toDate('2020-01-01') AS z, toUInt32(1) AS x FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNames" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNames" 2>&1 | grep -F -q "CANNOT_SKIP_UNKNOWN_FIELD" && echo 'OK' || echo 'FAIL' + + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02102" +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02102" + + +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' + +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT RowBinaryWithNamesAndTypes" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02102 FORMAT RowBinaryWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' + +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02102" + diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.reference b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.reference new file mode 100644 index 00000000000..a89bc46acfb --- /dev/null +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.reference @@ -0,0 +1,76 @@ +TSV +\N +\N +Some text +\N +Some text +\N +Some more text +\N +\N +Some more text +1 Some text 1 +1 \N 1 +CustomNullSome text +CustomNullSome text +\N +Some more text +\N +\N +Some more text +1 \N 1 +1 \N 1 +CSV +\N +\N +\\NSome text +\N +\\NSome text +\N +Some more text +\N +\N +Some more text +1 \\NSome text 1 +1 \N 1 +CustomNullSome text +CustomNullSome text +\N +Some more text +\N +\N +Some more text +1 \N 1 +1 \N 1 +Corner cases +TSV +Some text \N +Some text CustomNull Some text +OK +OK +CSV +Some text \N +Some text CustomNull Some text +OK +OK +Large custom NULL +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 +0000000000Custom NULL representation0000000000 diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh new file mode 100755 index 00000000000..4162e046ca4 --- /dev/null +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +DATA_FILE=$USER_FILES_PATH/test_02103_null.data + +echo "TSV" + +echo 'Custom NULL representation' > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='Custom NULL representation'" + +echo -e 'N\tU\tL\tL' > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='N\tU\tL\tL'" + +echo -e "\\NSome text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')" + +echo -e "\\N" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')" + +echo -e "\\NSome text\n\\N\nSome more text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')" + +echo -e "\\N\n\\N\nSome more text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)')" + +echo -e "1\t\\NSome text\t1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32')" + +echo -e "1\t\\N\t1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32')" + +echo -e "CustomNullSome text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'" + +echo -e "CustomNullSome text\nCustomNull\nSome more text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'" + +echo -e "CustomNull\nCustomNull\nSome more text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS format_tsv_null_representation='CustomNull'" + +echo -e "1\tCustomNull\t1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'" + +echo -e "1\tCustomNull\t1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_tsv_null_representation='CustomNull'" + + +echo "CSV" + +echo 'Custom NULL representation' > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='Custom NULL representation'" + +echo -e 'N,U,L,L' > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='N,U,L,L'" + +echo -e "\\NSome text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')" + +echo -e "\\N" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')" + +echo -e "\\NSome text\n\\N\nSome more text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')" + +echo -e "\\N\n\\N\nSome more text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)')" + +echo -e "1,\\NSome text,1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32')" + +echo -e "1,\\N,1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32')" + +echo -e "CustomNullSome text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'" + +echo -e "CustomNullSome text\nCustomNull\nSome more text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'" + +echo -e "CustomNull\nCustomNull\nSome more text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's Nullable(String)') SETTINGS format_csv_null_representation='CustomNull'" + +echo -e "1,CustomNull,1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'" + +echo -e "1,CustomNull,1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 'x Int32, s Nullable(String), y Int32') SETTINGS format_csv_null_representation='CustomNull'" + + +echo 'Corner cases' +echo 'TSV' + +echo -e "Some text\tCustomNull" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" + +echo -e "Some text\tCustomNull Some text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" + +echo -e "Some text\t123NNN" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' + +echo -e "Some text\tNU\tLL" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' + +echo 'CSV' + +echo -e "Some text,CustomNull" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" + +echo -e "Some text,CustomNull Some text" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" + +echo -e "Some text,123NNN" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' + +echo -e "Some text,NU,LL" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' + + +echo 'Large custom NULL' + +$CLICKHOUSE_CLIENT -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'" + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.reference b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.reference new file mode 100644 index 00000000000..962b233e5e7 --- /dev/null +++ b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.reference @@ -0,0 +1,80 @@ +0 [0,1,2,3,4,5,6,7,8,9] 0 +1 [0,1,2,3,4,5,6,7,8,9,10] 1 +2 [0,1,2,3,4,5,6,7,8,9,10,11] 2 +3 [0,1,2,3,4,5,6,7,8,9,10,11,12] 3 +4 [0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4 +5 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 5 +6 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 6 +7 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 7 +8 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 8 +9 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] 9 +0 [0,1,2,3,4,5,6,7,8,9] 0 +1 [0,1,2,3,4,5,6,7,8,9,10] 1 +2 [0,1,2,3,4,5,6,7,8,9,10,11] 2 +3 [0,1,2,3,4,5,6,7,8,9,10,11,12] 3 +4 [0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4 +5 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 5 +6 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 6 +7 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 7 +8 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 8 +9 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] 9 +0 [0,1,2,3,4,5,6,7,8,9] 0 +1 [0,1,2,3,4,5,6,7,8,9,10] 1 +2 [0,1,2,3,4,5,6,7,8,9,10,11] 2 +3 [0,1,2,3,4,5,6,7,8,9,10,11,12] 3 +4 [0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4 +5 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 5 +6 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 6 +7 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 7 +8 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 8 +9 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] 9 +0 [0,1,2,3,4,5,6,7,8,9] 0 +1 [0,1,2,3,4,5,6,7,8,9,10] 1 +2 [0,1,2,3,4,5,6,7,8,9,10,11] 2 +3 [0,1,2,3,4,5,6,7,8,9,10,11,12] 3 +4 [0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4 +5 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 5 +6 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 6 +7 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 7 +8 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 8 +9 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] 9 +0 [0,1,2,3,4,5,6,7,8,9] 0 +1 [0,1,2,3,4,5,6,7,8,9,10] 1 +2 [0,1,2,3,4,5,6,7,8,9,10,11] 2 +3 [0,1,2,3,4,5,6,7,8,9,10,11,12] 3 +4 [0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4 +5 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 5 +6 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 6 +7 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 7 +8 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 8 +9 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] 9 +0 [0,1,2,3,4,5,6,7,8,9] 0 +1 [0,1,2,3,4,5,6,7,8,9,10] 1 +2 [0,1,2,3,4,5,6,7,8,9,10,11] 2 +3 [0,1,2,3,4,5,6,7,8,9,10,11,12] 3 +4 [0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4 +5 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 5 +6 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 6 +7 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 7 +8 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 8 +9 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] 9 +0 [0,1,2,3,4,5,6,7,8,9] 0 +1 [0,1,2,3,4,5,6,7,8,9,10] 1 +2 [0,1,2,3,4,5,6,7,8,9,10,11] 2 +3 [0,1,2,3,4,5,6,7,8,9,10,11,12] 3 +4 [0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4 +5 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 5 +6 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 6 +7 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 7 +8 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 8 +9 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] 9 +0 [0,1,2,3,4,5,6,7,8,9] 0 +1 [0,1,2,3,4,5,6,7,8,9,10] 1 +2 [0,1,2,3,4,5,6,7,8,9,10,11] 2 +3 [0,1,2,3,4,5,6,7,8,9,10,11,12] 3 +4 [0,1,2,3,4,5,6,7,8,9,10,11,12,13] 4 +5 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] 5 +6 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 6 +7 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 7 +8 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 8 +9 [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18] 9 diff --git a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh new file mode 100755 index 00000000000..487282099e2 --- /dev/null +++ b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +DATA_FILE=$USER_FILES_PATH/test_02103.data + +FORMATS=('TSVWithNames' 'TSVWithNamesAndTypes' 'TSVRawWithNames' 'TSVRawWithNamesAndTypes' 'CSVWithNames' 'CSVWithNamesAndTypes' 'JSONCompactEachRowWithNames' 'JSONCompactEachRowWithNamesAndTypes') + +for format in "${FORMATS[@]}" +do + $CLICKHOUSE_CLIENT -q "SELECT number, range(number + 10) AS array, toString(number) AS string FROM numbers(10) FORMAT $format" > $DATA_FILE + $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" +done + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/02104_json_strings_nullable_string.reference b/tests/queries/0_stateless/02104_json_strings_nullable_string.reference new file mode 100644 index 00000000000..a2b5b4ad2ec --- /dev/null +++ b/tests/queries/0_stateless/02104_json_strings_nullable_string.reference @@ -0,0 +1,2 @@ +NULLSome string +NULLSome string diff --git a/tests/queries/0_stateless/02104_json_strings_nullable_string.sh b/tests/queries/0_stateless/02104_json_strings_nullable_string.sh new file mode 100755 index 00000000000..6a5d369e7b6 --- /dev/null +++ b/tests/queries/0_stateless/02104_json_strings_nullable_string.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +DATA_FILE=$USER_FILES_PATH/test_02104_null.data + +echo -e '{"s" : "NULLSome string"}' > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02104_null.data', 'JSONStringsEachRow', 's Nullable(String)')" + +echo -e '["NULLSome string"]' > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02104_null.data', 'JSONCompactStringsEachRow', 's Nullable(String)')" + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/02105_table_function_file_partiotion_by.reference b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.reference new file mode 100644 index 00000000000..e6d8f69d9eb --- /dev/null +++ b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.reference @@ -0,0 +1,6 @@ +part 1 +3 2 1 +part 2 +1 3 2 +part 3 +1 2 3 diff --git a/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh new file mode 100755 index 00000000000..2e7e0fede94 --- /dev/null +++ b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +mkdir -p "${user_files_path}/" +chmod 777 ${user_files_path} + +FILE_PATH="${user_files_path}/test_table_function_file" + +function cleanup() +{ + rm -r ${FILE_PATH} +} +trap cleanup EXIT + +values="(1, 2, 3), (3, 2, 1), (1, 3, 2)" +${CLICKHOUSE_CLIENT} --query="insert into table function file('${FILE_PATH}/test_{_partition_id}', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') PARTITION BY column3 values ${values}"; +echo 'part 1' +${CLICKHOUSE_CLIENT} --query="select * from file('${FILE_PATH}/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"; +echo 'part 2' +${CLICKHOUSE_CLIENT} --query="select * from file('${FILE_PATH}/test_2', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"; +echo 'part 3' +${CLICKHOUSE_CLIENT} --query="select * from file('${FILE_PATH}/test_3', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')"; + diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect new file mode 100755 index 00000000000..b676c221c65 --- /dev/null +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect @@ -0,0 +1,27 @@ +#!/usr/bin/expect -f +# Tags: no-parallel, no-fasttest + +log_user 0 +set timeout 20 +match_max 100000 + +# A default timeout action is to fail +expect_after { + timeout { + exit 1 + } +} + + +spawn bash -c "\$CLICKHOUSE_TESTS_DIR/helpers/02112_prepare.sh" + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion --interactive --queries-file \$CURDIR/file_02112" +expect ":) " + +send -- "select * from t format TSV\r" +expect "1" +expect ":) " + +spawn bash -c "\$CLICKHOUSE_TESTS_DIR/helpers/02112_clean.sh" + diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.reference b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect new file mode 100755 index 00000000000..6bb9140ee35 --- /dev/null +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect @@ -0,0 +1,24 @@ +#!/usr/bin/expect -f +# Tags: no-unbundled, no-fasttest + +log_user 0 +set timeout 20 +match_max 100000 + +# A default timeout action is to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --query 'create table t(i Int32) engine=Memory; insert into t select 1'" +expect ":) " + +send -- "select * from t format TSV\r" +expect "1" +expect ":) " + +send -- "exit\r" +expect eof diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local.reference b/tests/queries/0_stateless/02112_delayed_clickhouse_local.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect new file mode 100755 index 00000000000..f0aef1550c3 --- /dev/null +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect @@ -0,0 +1,27 @@ +#!/usr/bin/expect -f +# Tags: no-parallel, no-fasttest + +log_user 0 +set timeout 20 +match_max 100000 + +# A default timeout action is to fail +expect_after { + timeout { + exit 1 + } +} + + +spawn bash -c "\$CLICKHOUSE_TESTS_DIR/helpers/02112_prepare.sh" + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --queries-file \$CURDIR/file_02112" +expect ":) " + +send -- "select * from t format TSV\r" +expect "1" +expect ":) " + +spawn bash -c "\$CLICKHOUSE_TESTS_DIR/helpers/02112_clean.sh" + diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.reference b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02112_with_fill_interval.reference b/tests/queries/0_stateless/02112_with_fill_interval.reference new file mode 100644 index 00000000000..fc6f9378bfa --- /dev/null +++ b/tests/queries/0_stateless/02112_with_fill_interval.reference @@ -0,0 +1,109 @@ +1 DAY +2020-02-05 1 +2020-02-06 0 +2020-02-07 0 +2020-02-08 0 +2020-02-09 0 +1 WEEK +2020-02-02 1 +2020-02-09 0 +2020-02-16 1 +2020-02-23 0 +2020-03-01 1 +1 MONTH +2020-02-01 2 +2020-03-01 1 +2020-04-01 0 +2020-05-01 0 +2020-06-01 1 +3 MONTH +2020-01-01 0 +2020-02-01 2 +2020-03-01 1 +2020-04-01 0 +2020-06-01 1 +2020-07-01 0 +2020-10-01 0 +1 DAY +2020-02-05 1 +2020-02-06 0 +2020-02-07 0 +2020-02-08 0 +2020-02-09 0 +1 WEEK +2020-02-02 1 +2020-02-09 0 +2020-02-16 1 +2020-02-23 0 +2020-03-01 1 +1 MONTH +2020-02-01 2 +2020-03-01 1 +2020-04-01 0 +2020-05-01 0 +2020-06-01 1 +3 MONTH +2020-01-01 0 +2020-02-01 2 +2020-03-01 1 +2020-04-01 0 +2020-06-01 1 +2020-07-01 0 +2020-10-01 0 +15 MINUTE +2020-02-05 10:20:00 1 +2020-02-05 10:35:00 0 +2020-02-05 10:50:00 0 +2020-02-05 11:05:00 0 +2020-02-05 11:20:00 0 +6 HOUR +2020-02-05 10:00:00 1 +2020-02-05 16:00:00 0 +2020-02-05 22:00:00 0 +2020-02-06 04:00:00 0 +2020-02-06 10:00:00 0 +10 DAY +2020-02-05 00:00:00 1 +2020-02-15 00:00:00 0 +2020-02-25 00:00:00 0 +2020-03-06 00:00:00 0 +2020-03-08 00:00:00 1 +15 MINUTE +2020-02-05 10:20:00.000 1 +2020-02-05 10:35:00.000 0 +2020-02-05 10:50:00.000 0 +2020-02-05 11:05:00.000 0 +2020-02-05 11:20:00.000 0 +6 HOUR +2020-02-05 10:00:00 1 +2020-02-05 16:00:00 0 +2020-02-05 22:00:00 0 +2020-02-06 04:00:00 0 +2020-02-06 10:00:00 0 +10 DAY +2020-02-05 00:00:00 1 +2020-02-15 00:00:00 0 +2020-02-25 00:00:00 0 +2020-03-06 00:00:00 0 +2020-03-08 00:00:00 1 +1 MONTH +2020-01-01 1 0 +2020-01-01 2 0 +2020-01-01 3 0 +2020-01-01 4 0 +2020-02-01 1 1 +2020-02-01 2 0 +2020-02-01 3 1 +2020-02-01 4 0 +2020-03-01 1 0 +2020-03-01 2 1 +2020-03-01 3 1 +2020-03-01 4 0 +2020-04-01 1 0 +2020-04-01 2 0 +2020-04-01 3 0 +2020-04-01 4 0 +2020-05-01 1 0 +2020-05-01 2 0 +2020-05-01 3 0 +2020-05-01 4 0 diff --git a/tests/queries/0_stateless/02112_with_fill_interval.sql b/tests/queries/0_stateless/02112_with_fill_interval.sql new file mode 100644 index 00000000000..e93a7664de0 --- /dev/null +++ b/tests/queries/0_stateless/02112_with_fill_interval.sql @@ -0,0 +1,81 @@ +DROP TABLE IF EXISTS with_fill_date; +CREATE TABLE with_fill_date (d Date, d32 Date32) ENGINE = Memory; + +INSERT INTO with_fill_date VALUES (toDate('2020-02-05'), toDate32('2020-02-05')); +INSERT INTO with_fill_date VALUES (toDate('2020-02-16'), toDate32('2020-02-16')); +INSERT INTO with_fill_date VALUES (toDate('2020-03-03'), toDate32('2020-03-03')); +INSERT INTO with_fill_date VALUES (toDate('2020-06-10'), toDate32('2020-06-10')); + +SELECT '1 DAY'; +SELECT d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL STEP INTERVAL 1 DAY LIMIT 5; +SELECT '1 WEEK'; +SELECT toStartOfWeek(d) as d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL STEP INTERVAL 1 WEEK LIMIT 5; +SELECT '1 MONTH'; +SELECT toStartOfMonth(d) as d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL STEP INTERVAL 1 MONTH LIMIT 5; +SELECT '3 MONTH'; +SELECT toStartOfMonth(d) as d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL + FROM toDate('2020-01-01') + TO toDate('2021-01-01') + STEP INTERVAL 3 MONTH; + +SELECT d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL STEP INTERVAL 1 HOUR LIMIT 5; -- { serverError 475 } + +SELECT '1 DAY'; +SELECT d32, count() FROM with_fill_date GROUP BY d32 ORDER BY d32 WITH FILL STEP INTERVAL 1 DAY LIMIT 5; +SELECT '1 WEEK'; +SELECT toStartOfWeek(d32) as d32, count() FROM with_fill_date GROUP BY d32 ORDER BY d32 WITH FILL STEP INTERVAL 1 WEEK LIMIT 5; +SELECT '1 MONTH'; +SELECT toStartOfMonth(d32) as d32, count() FROM with_fill_date GROUP BY d32 ORDER BY d32 WITH FILL STEP INTERVAL 1 MONTH LIMIT 5; +SELECT '3 MONTH'; +SELECT toStartOfMonth(d32) as d32, count() FROM with_fill_date GROUP BY d32 ORDER BY d32 WITH FILL + FROM toDate('2020-01-01') + TO toDate('2021-01-01') + STEP INTERVAL 3 MONTH; + +SELECT d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL STEP INTERVAL 1 HOUR LIMIT 5; -- { serverError 475 } + +DROP TABLE with_fill_date; + +DROP TABLE IF EXISTS with_fill_date; +CREATE TABLE with_fill_date (d DateTime, d64 DateTime64) ENGINE = Memory; + +INSERT INTO with_fill_date VALUES (toDateTime('2020-02-05 10:20:00'), toDateTime64('2020-02-05 10:20:00', 3)); +INSERT INTO with_fill_date VALUES (toDateTime('2020-03-08 11:01:00'), toDateTime64('2020-03-08 11:01:00', 3)); + +SELECT '15 MINUTE'; +SELECT d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL STEP INTERVAL 15 MINUTE LIMIT 5; +SELECT '6 HOUR'; +SELECT toStartOfHour(d) as d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL STEP INTERVAL 6 HOUR LIMIT 5; +SELECT '10 DAY'; +SELECT toStartOfDay(d) as d, count() FROM with_fill_date GROUP BY d ORDER BY d WITH FILL STEP INTERVAL 10 DAY LIMIT 5; + +SELECT '15 MINUTE'; +SELECT d64, count() FROM with_fill_date GROUP BY d64 ORDER BY d64 WITH FILL STEP INTERVAL 15 MINUTE LIMIT 5; +SELECT '6 HOUR'; +SELECT toStartOfHour(d64) as d64, count() FROM with_fill_date GROUP BY d64 ORDER BY d64 WITH FILL STEP INTERVAL 6 HOUR LIMIT 5; +SELECT '10 DAY'; +SELECT toStartOfDay(d64) as d64, count() FROM with_fill_date GROUP BY d64 ORDER BY d64 WITH FILL STEP INTERVAL 10 DAY LIMIT 5; + +DROP TABLE with_fill_date; + +SELECT number FROM numbers(100) ORDER BY number WITH FILL STEP INTERVAL 1 HOUR; -- { serverError 475 } + +CREATE TABLE with_fill_date (d Date, id UInt32) ENGINE = Memory; + +INSERT INTO with_fill_date VALUES (toDate('2020-02-05'), 1); +INSERT INTO with_fill_date VALUES (toDate('2020-02-16'), 3); +INSERT INTO with_fill_date VALUES (toDate('2020-03-10'), 2); +INSERT INTO with_fill_date VALUES (toDate('2020-03-03'), 3); + +SELECT '1 MONTH'; + +SELECT toStartOfMonth(d) as d, id, count() FROM with_fill_date +GROUP BY d, id +ORDER BY +d WITH FILL + FROM toDate('2020-01-01') + TO toDate('2020-05-01') + STEP INTERVAL 1 MONTH, +id WITH FILL FROM 1 TO 5; + +DROP TABLE with_fill_date; diff --git a/tests/queries/0_stateless/02113_format_row_bug.reference b/tests/queries/0_stateless/02113_format_row_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02113_format_row_bug.sql b/tests/queries/0_stateless/02113_format_row_bug.sql new file mode 100644 index 00000000000..c2144ca1537 --- /dev/null +++ b/tests/queries/0_stateless/02113_format_row_bug.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest + +select formatRow('ORC', number, toDate(number)) from numbers(5); -- { serverError 36 } +select formatRow('Parquet', number, toDate(number)) from numbers(5); -- { serverError 36 } +select formatRow('Arrow', number, toDate(number)) from numbers(5); -- { serverError 36 } +select formatRow('Native', number, toDate(number)) from numbers(5); -- { serverError 36 } diff --git a/tests/queries/0_stateless/helpers/02112_clean.sh b/tests/queries/0_stateless/helpers/02112_clean.sh new file mode 100755 index 00000000000..910c0709955 --- /dev/null +++ b/tests/queries/0_stateless/helpers/02112_clean.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +FILE=${CURDIR}/file_02112 +if [ -f $FILE ]; then + rm $FILE +fi diff --git a/tests/queries/0_stateless/helpers/02112_prepare.sh b/tests/queries/0_stateless/helpers/02112_prepare.sh new file mode 100755 index 00000000000..1f371789f86 --- /dev/null +++ b/tests/queries/0_stateless/helpers/02112_prepare.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +FILE=${CURDIR}/file_02112 +if [ -f $FILE ]; then + rm $FILE +fi +echo "drop table if exists t;create table t(i Int32) engine=Memory; insert into t select 1" >> $FILE diff --git a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference index 6d663c33057..7ad5359a30e 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference @@ -1,12 +1,28 @@ JSONEachRow, false -7251839681e559f5a92db107571bb357 - +e0a3c9978a92a277f2fff4664f3c1749 - JSONEachRow, true -7251839681e559f5a92db107571bb357 - +e0a3c9978a92a277f2fff4664f3c1749 - JSONCompactEachRow, false -ba1081a754a06ef6563840b2d8d4d327 - +0c1efbbc25a5bd90a2ecea559d283667 - JSONCompactEachRow, true -ba1081a754a06ef6563840b2d8d4d327 - +0c1efbbc25a5bd90a2ecea559d283667 - +JSONCompactStringsEachRow, false +0c1efbbc25a5bd90a2ecea559d283667 - +JSONCompactStringsEachRow, true +0c1efbbc25a5bd90a2ecea559d283667 - +JSONCompactEachRowWithNames, false +b9e4f8ecadbb650245d1762f4187ee0a - +JSONCompactEachRowWithNames, true +b9e4f8ecadbb650245d1762f4187ee0a - +JSONCompactStringsEachRowWithNames, false +b9e4f8ecadbb650245d1762f4187ee0a - +JSONCompactStringsEachRowWithNames, true +b9e4f8ecadbb650245d1762f4187ee0a - +JSONCompactEachRowWithNamesAndTypes, false +8b41f7375999b53d4c9607398456fe5b - +JSONCompactEachRowWithNamesAndTypes, true +8b41f7375999b53d4c9607398456fe5b - JSONCompactStringsEachRowWithNamesAndTypes, false -31ded3cd9971b124450fb5a44a8bce63 - +8b41f7375999b53d4c9607398456fe5b - JSONCompactStringsEachRowWithNamesAndTypes, true -31ded3cd9971b124450fb5a44a8bce63 - +8b41f7375999b53d4c9607398456fe5b - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh index 5d54328e45d..f6c87eabfde 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh @@ -6,15 +6,15 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -FORMATS=('JSONEachRow' 'JSONCompactEachRow' 'JSONCompactStringsEachRowWithNamesAndTypes') +FORMATS=('JSONEachRow' 'JSONCompactEachRow' 'JSONCompactStringsEachRow' 'JSONCompactEachRowWithNames' 'JSONCompactStringsEachRowWithNames' 'JSONCompactEachRowWithNamesAndTypes' 'JSONCompactStringsEachRowWithNamesAndTypes') for format in "${FORMATS[@]}" do echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum done diff --git a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference new file mode 100644 index 00000000000..0c0367694b2 --- /dev/null +++ b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference @@ -0,0 +1,20 @@ +TSVWithNamesAndTypes, false +7c1feeaae418e502d66fcc8e31946f2e - +TSVWithNamesAndTypes, true +7c1feeaae418e502d66fcc8e31946f2e - +CSVWithNamesAndTypes, false +7c1feeaae418e502d66fcc8e31946f2e - +CSVWithNamesAndTypes, true +7c1feeaae418e502d66fcc8e31946f2e - +JSONStringsEachRow, false +7c1feeaae418e502d66fcc8e31946f2e - +JSONStringsEachRow, true +7c1feeaae418e502d66fcc8e31946f2e - +JSONCompactEachRowWithNamesAndTypes, false +7c1feeaae418e502d66fcc8e31946f2e - +JSONCompactEachRowWithNamesAndTypes, true +7c1feeaae418e502d66fcc8e31946f2e - +JSONCompactStringsEachRowWithNamesAndTypes, false +7c1feeaae418e502d66fcc8e31946f2e - +JSONCompactStringsEachRowWithNamesAndTypes, true +7c1feeaae418e502d66fcc8e31946f2e - diff --git a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh new file mode 100755 index 00000000000..9fdca20d097 --- /dev/null +++ b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +FORMATS=('TSVWithNamesAndTypes' 'CSVWithNamesAndTypes' 'JSONStringsEachRow' 'JSONCompactEachRowWithNamesAndTypes' 'JSONCompactStringsEachRowWithNamesAndTypes') +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" + +for format in "${FORMATS[@]}" +do + # Columns are permuted + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + + echo "$format, false"; + $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ + $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format SETTINGS input_format_null_as_default=0" + + $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" + + + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + echo "$format, true"; + $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ + $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format SETTINGS input_format_null_as_default=0" + + $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" +done diff --git a/utils/changelog/README.md b/utils/changelog/README.md index 69a190fdedc..28135d088ac 100644 --- a/utils/changelog/README.md +++ b/utils/changelog/README.md @@ -1,4 +1,4 @@ -## Generate changelog +## How To Generate Changelog Generate github token: * https://github.com/settings/tokens - keep all checkboxes unchecked, no scopes need to be enabled. @@ -8,6 +8,10 @@ Dependencies: apt-get install git curl jq python3 python3-fuzzywuzzy ``` +Update information about tags: +``` +git fetch --tags +``` Usage example: diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index ed6a7aea972..485f8c09faf 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -4,7 +4,7 @@ #include #include #include -#include // Y_IGNORE +#include #include #include #include diff --git a/website/benchmark/dbms/index.html b/website/benchmark/dbms/index.html index 453b6fff9cd..b4e29098ead 100644 --- a/website/benchmark/dbms/index.html +++ b/website/benchmark/dbms/index.html @@ -35,7 +35,7 @@

Full results

-
+
diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html index d7f8c80e1cf..6dc12890ef4 100644 --- a/website/benchmark/hardware/index.html +++ b/website/benchmark/hardware/index.html @@ -35,7 +35,7 @@

Full results

-
+
diff --git a/website/css/main.css b/website/css/main.css index 70a73dda521..73ff758625f 100644 --- a/website/css/main.css +++ b/website/css/main.css @@ -1 +1 @@ -@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:flex;justify-content:center}.btns .btn+.btn{margin-left:24px}.btns .btn-lg+.btn-lg{margin-left:40px}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(10%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(60%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3,.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s}}@media screen and (min-width:980px){.case-study-card .col-lg-3{transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3,.case-study-card.is-open .col-lg-auto{opacity:1;transform:none}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{transition-delay:.4s}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{color:#495057;list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file +@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{color:#495057;list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file diff --git a/website/images/photos/elissa-weve.jpg b/website/images/photos/elissa-weve.jpg new file mode 100644 index 00000000000..fb65c43c504 Binary files /dev/null and b/website/images/photos/elissa-weve.jpg differ diff --git a/website/images/photos/niek-lok.jpg b/website/images/photos/niek-lok.jpg new file mode 100644 index 00000000000..bfa2d889565 Binary files /dev/null and b/website/images/photos/niek-lok.jpg differ diff --git a/website/images/photos/pascal-van-den-nieuwendijk.jpg b/website/images/photos/pascal-van-den-nieuwendijk.jpg new file mode 100644 index 00000000000..a058206d68e Binary files /dev/null and b/website/images/photos/pascal-van-den-nieuwendijk.jpg differ diff --git a/website/images/photos/roopa-tangirala.jpg b/website/images/photos/roopa-tangirala.jpg new file mode 100644 index 00000000000..2583990a98d Binary files /dev/null and b/website/images/photos/roopa-tangirala.jpg differ diff --git a/website/src/scss/components/_btns.scss b/website/src/scss/components/_btns.scss index da2d7535e62..bed4e794023 100644 --- a/website/src/scss/components/_btns.scss +++ b/website/src/scss/components/_btns.scss @@ -1,13 +1,32 @@ .btns { align-items: center; - display: flex; + display: grid; + column-gap: $spacer * 3; + row-gap: $spacer * 2; + grid-auto-flow: column; justify-content: center; - .btn + .btn { - margin-left: $spacer * 3; - } - - .btn-lg + .btn-lg { - margin-left: $spacer * 5; + @media screen and (max-width: 767.98px) { + grid-auto-flow: row; + } +} + +.btns.btns-lg { + column-gap: $spacer * 5; +} + +.btns.is-2 { + grid-template-columns: 1fr 1fr; + + @media screen and (max-width: 767.98px) { + grid-template-columns: 1fr; + } +} + +.btns.is-3 { + grid-template-columns: 1fr 1fr 1fr; + + @media screen and (max-width: 767.98px) { + grid-template-columns: 1fr; } } diff --git a/website/src/scss/components/_case-study-card.scss b/website/src/scss/components/_case-study-card.scss index bb2eb53abd1..d809e770215 100644 --- a/website/src/scss/components/_case-study-card.scss +++ b/website/src/scss/components/_case-study-card.scss @@ -95,12 +95,12 @@ z-index: 1; @media screen and (min-width: 1240px) { - transform: translateX(10%); + transform: translateX(50%); } } &.is-open:before { - transform: translateX(60%); + transform: translateX(70%); transition-delay: 0s; } @@ -167,18 +167,28 @@ transform: rotate(0); } - .col-lg-3, - .col-lg-auto { - opacity: 0; - transform: translateX(24px); - transition: 0.4s opacity, 0.4s transform; + .col-lg-3 { + left: -60%; + position: relative; + transition: 0.4s left; + transition-delay: 0.6s; + + @media screen and (min-width: 980px) { + flex: 0 0 250px; + max-width: 250px; + width: 250px; + } } - .col-lg-3 { + &.is-open .col-lg-3 { + left: 0; transition-delay: 0s; } .col-lg-auto { + opacity: 0; + transform: translateX(24px); + transition: 0.4s opacity, 0.4s transform; transition-delay: 0.2s; @media screen and (min-width: 980px) { @@ -187,17 +197,9 @@ } } - &.is-open .col-lg-3, &.is-open .col-lg-auto { opacity: 1; transform: none; - } - - &.is-open .col-lg-3 { - transition-delay: 0.4s; - } - - &.is-open .col-lg-auto { transition-delay: 0.2s; } } diff --git a/website/src/scss/utilities/_overflow.scss b/website/src/scss/utilities/_overflow.scss new file mode 100644 index 00000000000..d4c8444371a --- /dev/null +++ b/website/src/scss/utilities/_overflow.scss @@ -0,0 +1,3 @@ +.overflow-auto { + overflow: auto; +} diff --git a/website/templates/careers/greenhouse.html b/website/templates/careers/greenhouse.html index e4a4b3aba4f..52132f5ef51 100644 --- a/website/templates/careers/greenhouse.html +++ b/website/templates/careers/greenhouse.html @@ -1,4 +1,4 @@ -
+
diff --git a/website/templates/company/team.html b/website/templates/company/team.html index fb68be6af08..28efdcfde06 100644 --- a/website/templates/company/team.html +++ b/website/templates/company/team.html @@ -162,6 +162,19 @@ {{ _('Software Engineer') }}

+
+
+ + + + +

+ {{ _('Niek Lok') }} +

+

+ {{ _('Account Executive') }} +

+
@@ -292,6 +305,19 @@ {{ _('VP, Operations') }}

+
+
+ + + + +

+ {{ _('Roopa Tangirala') }} +

+

+ {{ _('Senior Director, Engineering') }} +

+
@@ -305,6 +331,19 @@ {{ _('Software Engineer') }}

+
+
+ + + + +

+ {{ _('Pascal Van den Nieuwendijk') }} +

+

+ {{ _('Account Executive') }} +

+
@@ -318,6 +357,19 @@ {{ _('VP, EMEA') }}

+
+
+ + + + +

+ {{ _('Elissa Weve') }} +

+

+ {{ _('Customer / Partner Alliance Manager') }} +

+
diff --git a/website/templates/contact-thank-you/hero.html b/website/templates/contact-thank-you/hero.html index 0e8cef54ce2..a1841eb433f 100644 --- a/website/templates/contact-thank-you/hero.html +++ b/website/templates/contact-thank-you/hero.html @@ -7,7 +7,7 @@

- {{ _('A salesperson will be in contact with you shortly.') }} + {{ _('Someone will be in contact with you shortly.') }}

diff --git a/website/templates/index/hero.html b/website/templates/index/hero.html index b01e79703df..b101e6f9831 100644 --- a/website/templates/index/hero.html +++ b/website/templates/index/hero.html @@ -10,9 +10,9 @@ {{ _('ClickHouse® is an open-source, high performance columnar OLAP database management system for real-time analytics using SQL.') }}

-
- Learn More - Release Webinar +

@@ -32,10 +32,10 @@

Raising the Company’s Valuation to $2B

- diff --git a/website/templates/index/success.html b/website/templates/index/success.html index 37b31fb2b1e..e09274c3a6f 100644 --- a/website/templates/index/success.html +++ b/website/templates/index/success.html @@ -2,7 +2,7 @@

- What users are saying + ClickHouse Users