diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h index 2ccad4be348..202eb88a361 100644 --- a/base/common/DateLUTImpl.h +++ b/base/common/DateLUTImpl.h @@ -18,6 +18,8 @@ #define DATE_LUT_MAX (0xFFFFFFFFU - 86400) #define DATE_LUT_MAX_DAY_NUM 0xFFFF +/// Max int value of Date32, DATE LUT cache size minus daynum_offset_epoch +#define DATE_LUT_MAX_EXTEND_DAY_NUM (DATE_LUT_SIZE - 16436) /// A constant to add to time_t so every supported time point becomes non-negative and still has the same remainder of division by 3600. /// If we treat "remainder of division" operation in the sense of modular arithmetic (not like in C++). @@ -270,6 +272,8 @@ public: auto getOffsetAtStartOfEpoch() const { return offset_at_start_of_epoch; } auto getTimeOffsetAtStartOfLUT() const { return offset_at_start_of_lut; } + auto getDayNumOffsetEpoch() const { return daynum_offset_epoch; } + /// All functions below are thread-safe; arguments are not checked. inline ExtendedDayNum toDayNum(ExtendedDayNum d) const @@ -926,15 +930,17 @@ public: { if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) return LUTIndex(0); - - return LUTIndex{years_months_lut[(year - DATE_LUT_MIN_YEAR) * 12 + month - 1] + day_of_month - 1}; + auto year_lut_index = (year - DATE_LUT_MIN_YEAR) * 12 + month - 1; + UInt32 index = years_months_lut[year_lut_index].toUnderType() + day_of_month - 1; + /// When date is out of range, default value is DATE_LUT_SIZE - 1 (2283-11-11) + return LUTIndex{std::min(index, static_cast(DATE_LUT_SIZE - 1))}; } /// Create DayNum from year, month, day of month. - inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month) const + inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const { if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) - return ExtendedDayNum(0); + return ExtendedDayNum(default_error_day_num); return toDayNum(makeLUTIndex(year, month, day_of_month)); } @@ -1091,9 +1097,9 @@ public: return lut[new_index].date + time; } - inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const + inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int32 delta) const { - return addDays(t, delta * 7); + return addDays(t, static_cast(delta) * 7); } inline UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const @@ -1158,14 +1164,14 @@ public: return toDayNum(addMonthsIndex(d, delta)); } - inline Time NO_SANITIZE_UNDEFINED addQuarters(Time t, Int64 delta) const + inline Time NO_SANITIZE_UNDEFINED addQuarters(Time t, Int32 delta) const { - return addMonths(t, delta * 3); + return addMonths(t, static_cast(delta) * 3); } - inline ExtendedDayNum addQuarters(ExtendedDayNum d, Int64 delta) const + inline ExtendedDayNum addQuarters(ExtendedDayNum d, Int32 delta) const { - return addMonths(d, delta * 3); + return addMonths(d, static_cast(delta) * 3); } template diff --git a/base/common/LocalDate.h b/base/common/LocalDate.h index b1e6eeb907c..484847e1ff4 100644 --- a/base/common/LocalDate.h +++ b/base/common/LocalDate.h @@ -70,6 +70,14 @@ public: m_day = values.day_of_month; } + explicit LocalDate(ExtendedDayNum day_num) + { + const auto & values = DateLUT::instance().getValues(day_num); + m_year = values.year; + m_month = values.month; + m_day = values.day_of_month; + } + LocalDate(unsigned short year_, unsigned char month_, unsigned char day_) : m_year(year_), m_month(month_), m_day(day_) { @@ -98,6 +106,12 @@ public: return DayNum(lut.makeDayNum(m_year, m_month, m_day).toUnderType()); } + ExtendedDayNum getExtenedDayNum() const + { + const auto & lut = DateLUT::instance(); + return ExtendedDayNum (lut.makeDayNum(m_year, m_month, m_day).toUnderType()); + } + operator DayNum() const { return getDayNum(); diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 49cf30d2556..18072566d04 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54453) +SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 8) +SET(VERSION_MINOR 9) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH fb895056568e26200629c7d19626e92d2dedc70d) -SET(VERSION_DESCRIBE v21.8.1.1-prestable) -SET(VERSION_STRING 21.8.1.1) +SET(VERSION_GITHASH f48c5af90c2ad51955d1ee3b6b05d006b03e4238) +SET(VERSION_DESCRIBE v21.9.1.1-prestable) +SET(VERSION_STRING 21.9.1.1) # end of autochange diff --git a/contrib/h3 b/contrib/h3 index e209086ae1b..c7f46cfd71f 160000 --- a/contrib/h3 +++ b/contrib/h3 @@ -1 +1 @@ -Subproject commit e209086ae1b5477307f545a0f6111780edc59940 +Subproject commit c7f46cfd71fb60e2fefc90e28abe81657deff735 diff --git a/contrib/h3-cmake/CMakeLists.txt b/contrib/h3-cmake/CMakeLists.txt index 6b184a175b0..f4c70dc476f 100644 --- a/contrib/h3-cmake/CMakeLists.txt +++ b/contrib/h3-cmake/CMakeLists.txt @@ -3,21 +3,22 @@ set(H3_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/h3/src/h3lib") set(SRCS "${H3_SOURCE_DIR}/lib/algos.c" -"${H3_SOURCE_DIR}/lib/baseCells.c" -"${H3_SOURCE_DIR}/lib/bbox.c" "${H3_SOURCE_DIR}/lib/coordijk.c" -"${H3_SOURCE_DIR}/lib/faceijk.c" -"${H3_SOURCE_DIR}/lib/geoCoord.c" -"${H3_SOURCE_DIR}/lib/h3Index.c" -"${H3_SOURCE_DIR}/lib/h3UniEdge.c" -"${H3_SOURCE_DIR}/lib/linkedGeo.c" -"${H3_SOURCE_DIR}/lib/localij.c" -"${H3_SOURCE_DIR}/lib/mathExtensions.c" +"${H3_SOURCE_DIR}/lib/bbox.c" "${H3_SOURCE_DIR}/lib/polygon.c" +"${H3_SOURCE_DIR}/lib/h3Index.c" "${H3_SOURCE_DIR}/lib/vec2d.c" "${H3_SOURCE_DIR}/lib/vec3d.c" "${H3_SOURCE_DIR}/lib/vertex.c" +"${H3_SOURCE_DIR}/lib/linkedGeo.c" +"${H3_SOURCE_DIR}/lib/localij.c" +"${H3_SOURCE_DIR}/lib/latLng.c" +"${H3_SOURCE_DIR}/lib/directedEdge.c" +"${H3_SOURCE_DIR}/lib/mathExtensions.c" +"${H3_SOURCE_DIR}/lib/iterators.c" "${H3_SOURCE_DIR}/lib/vertexGraph.c" +"${H3_SOURCE_DIR}/lib/faceijk.c" +"${H3_SOURCE_DIR}/lib/baseCells.c" ) configure_file("${H3_SOURCE_DIR}/include/h3api.h.in" "${H3_BINARY_DIR}/include/h3api.h") diff --git a/debian/changelog b/debian/changelog index 36c29fce1d0..38f740ae062 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.8.1.1) unstable; urgency=low +clickhouse (21.9.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Mon, 28 Jun 2021 00:50:15 +0300 + -- clickhouse-release Sat, 10 Jul 2021 08:22:49 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 19cadccb926..f17fa8ade16 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.8.1.* +ARG version=21.9.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 65d90bf52ce..5da9e703f4d 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.8.1.* +ARG version=21.9.1.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 687393025f0..5768753cd7c 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.8.1.* +ARG version=21.9.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docs/en/development/adding_test_queries.md b/docs/en/development/adding_test_queries.md index 95dfd076a12..547d8b0fa37 100644 --- a/docs/en/development/adding_test_queries.md +++ b/docs/en/development/adding_test_queries.md @@ -105,11 +105,11 @@ clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee te 5) ensure everything is correct, if the test output is incorrect (due to some bug for example), adjust the reference file using text editor. -#### How to create good test +#### How to create a good test -- test should be +- A test should be - minimal - create only tables related to tested functionality, remove unrelated columns and parts of query - - fast - should not take longer than few seconds (better subseconds) + - fast - should not take longer than a few seconds (better subseconds) - correct - fails then feature is not working - deterministic - isolated / stateless @@ -126,6 +126,16 @@ clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee te - use other SQL files in the `0_stateless` folder as an example - ensure the feature / feature combination you want to test is not yet covered with existing tests +#### Test naming rules + +It's important to name tests correctly, so one could turn some tests subset off in clickhouse-test invocation. + +| Tester flag| What should be in test name | When flag should be added | +|---|---|---|---| +| `--[no-]zookeeper`| "zookeeper" or "replica" | Test uses tables from ReplicatedMergeTree family | +| `--[no-]shard` | "shard" or "distributed" or "global"| Test using connections to 127.0.0.2 or similar | +| `--[no-]long` | "long" or "deadlock" or "race" | Test runs longer than 60 seconds | + #### Commit / push / create PR. 1) commit & push your changes diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 8ef12221e8d..97b477d55a5 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -134,10 +134,10 @@ $ ./release ## Faster builds for development -Normally all tools of the ClickHouse bundle, such as `clickhouse-server`, `clickhouse-client` etc., are linked into a single static executable, `clickhouse`. This executable must be re-linked on every change, which might be slow. Two common ways to improve linking time are to use `lld` linker, and use the 'split' build configuration, which builds a separate binary for every tool, and further splits the code into several shared libraries. To enable these tweaks, pass the following flags to `cmake`: +Normally all tools of the ClickHouse bundle, such as `clickhouse-server`, `clickhouse-client` etc., are linked into a single static executable, `clickhouse`. This executable must be re-linked on every change, which might be slow. One common way to improve build time is to use the 'split' build configuration, which builds a separate binary for every tool, and further splits the code into several shared libraries. To enable this tweak, pass the following flags to `cmake`: ``` --DCMAKE_C_FLAGS="--ld-path=lld" -DCMAKE_CXX_FLAGS="--ld-path=lld" -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 +-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ``` ## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse} diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md new file mode 100644 index 00000000000..6af12eb9b01 --- /dev/null +++ b/docs/en/operations/clickhouse-keeper.md @@ -0,0 +1,114 @@ +--- +toc_priority: 66 +toc_title: ClickHouse Keeper +--- + +# [pre-production] clickhouse-keeper + +ClickHouse server use [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper. + +!!! warning "Warning" + This feature currently in pre-production stage. We test it in our CI and on small internal installations. + +## Implemetation details + +ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper `clickhouse-keeper` written in C++ and use [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages. + +By default, `clickhouse-keeper` provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with `clickhouse-keeper`. Snapshots and logs have an incompatible format with ZooKeeper, but `clickhouse-keeper-converter` tool allows to convert ZooKeeper data to `clickhouse-keeper` snapshot. Interserver protocol in `clickhouse-keeper` also incompatible with ZooKeeper so mixed ZooKeeper/clickhouse-keeper cluster is impossible. + +## Configuration + +`clickhouse-keeper` can be used as a standalone replacement for ZooKeeper or as an internal part of the `clickhouse-server`, but in both cases configuration is almost the same `.xml` file. The main `clickhouse-keeper` configuration tag is ``. Keeper configuration has the following parameters: + +- `tcp_port` — the port for a client to connect (default for ZooKeeper is `2181`) +- `tcp_port_secure` — the secure port for a client to connect +- `server_id` — unique server id, each participant of the clickhouse-keeper cluster must have a unique number (1, 2, 3, and so on) +- `log_storage_path` — path to coordination logs, better to store logs on the non-busy device (same for ZooKeeper) +- `snapshot_storage_path` — path to coordination snapshots + +Other common parameters are inherited from clickhouse-server config (`listen_host`, `logger` and so on). + +Internal coordination settings are located in `.` section: + +- `operation_timeout_ms` — timeout for a single client operation +- `session_timeout_ms` — timeout for client session +- `dead_session_check_period_ms` — how often clickhouse-keeper check dead sessions and remove them +- `heart_beat_interval_ms` — how often a clickhouse-keeper leader will send heartbeats to followers +- `election_timeout_lower_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it can initiate leader election +- `election_timeout_upper_bound_ms` — if follower didn't receive heartbeats from the leader in this interval, then it must initiate leader election +- `rotate_log_storage_interval` — how many logs to store in a single file +- `reserved_log_items` — how many coordination logs to store before compaction +- `snapshot_distance` — how often clickhouse-keeper will create new snapshots (in the number of logs) +- `snapshots_to_keep` — how many snapshots to keep +- `stale_log_gap` — the threshold when leader consider follower as stale and send snapshot to it instead of logs +- `force_sync` — call `fsync` on each write to coordination log +- `raft_logs_level` — text logging level about coordination (trace, debug, and so on) +- `shutdown_timeout` — wait to finish internal connections and shutdown +- `startup_timeout` — if the server doesn't connect to other quorum participants in the specified timeout it will terminate + +Quorum configuration is located in `.` section and contain servers description. The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The main parameters for each `` are: + +- `id` — server_id in quorum +- `hostname` — hostname where this server placed +- `port` — port where this server listen for connections + + +Examples of configuration for quorum with three nodes can be found in [integration tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) with `test_keeper_` prefix. Example configuration for server #1: + +```xml + + 2181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 10000 + 30000 + trace + + + + + 1 + zoo1 + 9444 + + + 2 + zoo2 + 9444 + + + 3 + zoo3 + 9444 + + + +``` + +## How to run + +`clickhouse-keeper` is bundled into `clickhouse-server` package, just add configuration of `` and start clickhouse-server as always. If you want to run standalone `clickhouse-keeper` you can start it in a similar way with: + +```bash +clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon +``` + +## [experimental] Migration from ZooKeeper + +Seamlessly migration from ZooKeeper to `clickhouse-keeper` is impossible you have to stop your ZooKeeper cluster, convert data and start `clickhouse-keeper`. `clickhouse-keeper-converter` tool allows to convert ZooKeeper logs and snapshots to `clickhouse-keeper` snapshot. It works only with ZooKeeper > 3.4. Steps for migration: + +1. Stop all ZooKeeper nodes. + +2. [optional, but recommended] Found ZooKeeper leader node, start and stop it again. It will force ZooKeeper to create consistent snapshot. + +3. Run `clickhouse-keeper-converter` on leader, example + +```bash +clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/version-2 --output-dir /path/to/clickhouse/keeper/snapshots +``` + +4. Copy snapshot to `clickhouse-server` nodes with configured `keeper` or start `clickhouse-keeper` instead of ZooKeeper. Snapshot must persist only on leader node, leader will sync it automatically to other nodes. + diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 96009c75af1..5c942efc77f 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -22,6 +22,23 @@ Some settings specified in the main configuration file can be overridden in othe The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)). +If you want to replace an entire element with a substitution use `include` as element name. + +XML substitution example: + +```xml + + + + + + + + + + +``` + Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. ## User Settings {#user-settings} @@ -32,6 +49,8 @@ Users configuration can be splitted into separate files similar to `config.xml` Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`. Directory `users.d` is used by default, as `users_config` defaults to `users.xml`. +Note that configuration files are first merged taking into account [Override](#override) settings and includes are processed after that. + ## XML example {#example} For example, you can have separate config file for each user like this: diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 7c0fe11ae64..d7f142dd8b1 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -12,7 +12,7 @@ For information on connecting and configuring external dictionaries, see [Extern ## dictGet, dictGetOrDefault, dictGetOrNull {#dictget} -Retrieves values from an external dictionary. +Retrieves values from an external dictionary. ``` sql dictGet('dict_name', attr_names, id_expr) @@ -24,7 +24,7 @@ dictGetOrNull('dict_name', attr_name, id_expr) - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. - `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. **Returned value** @@ -138,7 +138,7 @@ Configure the external dictionary: c2 String - + 0 @@ -237,7 +237,7 @@ dictHas('dict_name', id_expr) **Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. **Returned value** @@ -292,16 +292,16 @@ Type: `UInt8`. Returns first-level children as an array of indexes. It is the inverse transformation for [dictGetHierarchy](#dictgethierarchy). -**Syntax** +**Syntax** ``` sql dictGetChildren(dict_name, key) ``` -**Arguments** +**Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned values** @@ -339,7 +339,7 @@ SELECT dictGetChildren('hierarchy_flat_dictionary', number) FROM system.numbers ## dictGetDescendant {#dictgetdescendant} -Returns all descendants as if [dictGetChildren](#dictgetchildren) function was applied `level` times recursively. +Returns all descendants as if [dictGetChildren](#dictgetchildren) function was applied `level` times recursively. **Syntax** @@ -347,9 +347,9 @@ Returns all descendants as if [dictGetChildren](#dictgetchildren) function was a dictGetDescendants(dict_name, key, level) ``` -**Arguments** +**Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. - `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 4189d0feeb5..39e59ae2ba9 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -87,6 +87,8 @@ Result: └───────┴───────┘ ``` +Note: the names are implementation specific and are subject to change. You should not assume specific names of the columns after application of the `untuple`. + Example of using an `EXCEPT` expression: Query: diff --git a/docs/zh/sql-reference/functions/bitmap-functions.md b/docs/zh/sql-reference/functions/bitmap-functions.md index 5a6baf2f217..5a9a88c5be1 100644 --- a/docs/zh/sql-reference/functions/bitmap-functions.md +++ b/docs/zh/sql-reference/functions/bitmap-functions.md @@ -81,7 +81,7 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11, **示例** ``` sql -SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res +SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res ``` ┌─res───────────────────────┐ @@ -174,7 +174,7 @@ SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS re │ [3] │ └─────┘ -## 位图 {#bitmapor} +## 位图或 {#bitmapor} 为两个位图对象进行或操作,返回一个新的位图对象。 diff --git a/docs/zh/sql-reference/table-functions/mysql.md b/docs/zh/sql-reference/table-functions/mysql.md index c54cd7d2a06..3ed0001b0a0 100644 --- a/docs/zh/sql-reference/table-functions/mysql.md +++ b/docs/zh/sql-reference/table-functions/mysql.md @@ -1,13 +1,8 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 42 -toc_title: mysql ---- - # mysql {#mysql} -允许 `SELECT` 要对存储在远程MySQL服务器上的数据执行的查询。 +允许对存储在远程MySQL服务器上的数据执行`SELECT`和`INSERT`查询。 + +**语法** ``` sql mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']); @@ -15,31 +10,44 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_ **参数** -- `host:port` — MySQL server address. +- `host:port` — MySQL服务器地址. -- `database` — Remote database name. +- `database` — 远程数据库名称. -- `table` — Remote table name. +- `table` — 远程表名称. -- `user` — MySQL user. +- `user` — MySQL用户. -- `password` — User password. +- `password` — 用户密码. -- `replace_query` — Flag that converts `INSERT INTO` 查询到 `REPLACE INTO`. 如果 `replace_query=1`,查询被替换。 +- `replace_query` — 将INSERT INTO` 查询转换为 `REPLACE INTO`的标志。如果 `replace_query=1`,查询被替换。 -- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` 表达式被添加到 `INSERT` 查询。 +- `on_duplicate_clause` — 添加 `ON DUPLICATE KEY on_duplicate_clause` 表达式到 `INSERT` 查询。明确规定只能使用 `replace_query = 0` ,如果你同时设置replace_query = 1`和`on_duplicate_clause`,ClickHouse将产生异常。 - Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause. + 示例:`INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1` - To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception. + `on_duplicate_clause`这里是`UPDATE c2 = c2 + 1`。请查阅MySQL文档,来找到可以和`ON DUPLICATE KEY`一起使用的 `on_duplicate_clause`子句。 -简单 `WHERE` 条款如 `=, !=, >, >=, <, <=` 当前在MySQL服务器上执行。 +简单的 `WHERE` 子句如 `=, !=, >, >=, <, <=` 将即时在MySQL服务器上执行。其余的条件和 `LIMIT` 只有在对MySQL的查询完成后,才会在ClickHouse中执行采样约束。 -其余的条件和 `LIMIT` 只有在对MySQL的查询完成后,才会在ClickHouse中执行采样约束。 +支持使用`|`并列进行多副本查询,示例如下: + +```sql +SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password'); +``` + +或 + +```sql +SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password'); +``` **返回值** -与原始MySQL表具有相同列的table对象。 +与原始MySQL表具有相同列的表对象。 + +!!! note "注意" + 在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。 ## 用法示例 {#usage-example} @@ -66,7 +74,7 @@ mysql> select * from test; 1 row in set (0,00 sec) ``` -从ClickHouse中选择数据: +从ClickHouse中查询数据: ``` sql SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123') @@ -78,6 +86,21 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123') └────────┴──────────────┴───────┴────────────────┘ ``` +替换和插入: + +```sql +INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3); +INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4); +SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); +``` + +```text +┌─int_id─┬─float─┐ +│ 1 │ 3 │ +│ 2 │ 4 │ +└────────┴───────┘ +``` + ## 另请参阅 {#see-also} - [该 ‘MySQL’ 表引擎](../../engines/table-engines/integrations/mysql.md) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index c4aef014971..9c1c8338321 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -430,6 +430,7 @@ private: {TokenType::ClosingRoundBracket, Replxx::Color::BROWN}, {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, + {TokenType::DoubleColon, Replxx::Color::BROWN}, {TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE}, {TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE}, diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2633f0e9426..6be7ba1ad73 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -388,24 +388,32 @@ void LocalServer::processQueries() /// Use the same query_id (and thread group) for all queries CurrentThread::QueryScope query_scope_holder(context); - ///Set progress show + /// Set progress show need_render_progress = config().getBool("progress", false); + std::function finalize_progress; if (need_render_progress) { + /// Set progress callback, which can be run from multiple threads. context->setProgressCallback([&](const Progress & value) { /// Write progress only if progress was updated if (progress_indication.updateProgress(value)) progress_indication.writeProgress(); }); + + /// Set finalizing callback for progress, which is called right before finalizing query output. + finalize_progress = [&]() + { + progress_indication.clearProgressOutput(); + }; + + /// Set callback for file processing progress. + progress_indication.setFileProgressCallback(context); } bool echo_queries = config().hasOption("echo") || config().hasOption("verbose"); - if (need_render_progress) - progress_indication.setFileProgressCallback(context); - std::exception_ptr exception; for (const auto & query : queries) @@ -425,7 +433,7 @@ void LocalServer::processQueries() try { - executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}); + executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}, finalize_progress); } catch (...) { diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 313523d19dc..d4f830e5a0c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -477,17 +477,6 @@ int Server::main(const std::vector & /*args*/) CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); - if (ThreadFuzzer::instance().isEffective()) - LOG_WARNING(log, "ThreadFuzzer is enabled. Application will run slowly and unstable."); - -#if !defined(NDEBUG) || !defined(__OPTIMIZE__) - LOG_WARNING(log, "Server was built in debug mode. It will work slowly."); -#endif - -#if defined(SANITIZER) - LOG_WARNING(log, "Server was built with sanitizer. It will work slowly."); -#endif - /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases, ... */ @@ -497,6 +486,18 @@ int Server::main(const std::vector & /*args*/) global_context->makeGlobalContext(); global_context->setApplicationType(Context::ApplicationType::SERVER); +#if !defined(NDEBUG) || !defined(__OPTIMIZE__) + global_context->addWarningMessage("Server was built in debug mode. It will work slowly."); +#endif + +if (ThreadFuzzer::instance().isEffective()) + global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); + +#if defined(SANITIZER) + global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); +#endif + + // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. @@ -552,8 +553,10 @@ int Server::main(const std::vector & /*args*/) if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1) { /// Program is run under debugger. Modification of it's binary image is ok for breakpoints. - LOG_WARNING(log, "Server is run under debugger and its binary image is modified (most likely with breakpoints).", - calculated_binary_hash); + global_context->addWarningMessage( + fmt::format("Server is run under debugger and its binary image is modified (most likely with breakpoints).", + calculated_binary_hash) + ); } else { @@ -636,7 +639,7 @@ int Server::main(const std::vector & /*args*/) } else { - LOG_WARNING(log, message); + global_context->addWarningMessage(message); } } diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h index 5582a200921..90db2469828 100644 --- a/src/AggregateFunctions/AggregateFunctionBitwise.h +++ b/src/AggregateFunctions/AggregateFunctionBitwise.h @@ -9,6 +9,14 @@ #include +#if !defined(ARCADIA_BUILD) +# include +#endif + +#if USE_EMBEDDED_COMPILER +# include +# include +#endif namespace DB { @@ -21,6 +29,21 @@ struct AggregateFunctionGroupBitOrData T value = 0; static const char * name() { return "groupBitOr"; } void update(T x) { value |= x; } + +#if USE_EMBEDDED_COMPILER + + static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr) + { + auto type = toNativeType(builder); + builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr); + } + + static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs) + { + return builder.CreateOr(lhs, rhs); + } + +#endif }; template @@ -29,6 +52,21 @@ struct AggregateFunctionGroupBitAndData T value = -1; /// Two's complement arithmetic, sign extension. static const char * name() { return "groupBitAnd"; } void update(T x) { value &= x; } + +#if USE_EMBEDDED_COMPILER + + static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr) + { + auto type = toNativeType(builder); + builder.CreateStore(llvm::ConstantInt::get(type, -1), value_ptr); + } + + static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs) + { + return builder.CreateAnd(lhs, rhs); + } + +#endif }; template @@ -37,6 +75,21 @@ struct AggregateFunctionGroupBitXorData T value = 0; static const char * name() { return "groupBitXor"; } void update(T x) { value ^= x; } + +#if USE_EMBEDDED_COMPILER + + static void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * value_ptr) + { + auto type = toNativeType(builder); + builder.CreateStore(llvm::Constant::getNullValue(type), value_ptr); + } + + static llvm::Value* compileUpdate(llvm::IRBuilderBase & builder, llvm::Value * lhs, llvm::Value * rhs) + { + return builder.CreateXor(lhs, rhs); + } + +#endif }; @@ -45,7 +98,7 @@ template class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper> { public: - AggregateFunctionBitwise(const DataTypePtr & type) + explicit AggregateFunctionBitwise(const DataTypePtr & type) : IAggregateFunctionDataHelper>({type}, {}) {} String getName() const override { return Data::name(); } @@ -81,6 +134,68 @@ public: { assert_cast &>(to).getData().push_back(this->data(place).value); } + +#if USE_EMBEDDED_COMPILER + + bool isCompilable() const override + { + auto return_type = getReturnType(); + return canBeNativeType(*return_type); + } + + void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override + { + llvm::IRBuilder<> & b = static_cast &>(builder); + + auto * return_type = toNativeType(b, getReturnType()); + auto * value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + Data::compileCreate(builder, value_ptr); + } + + void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes &, const std::vector & argument_values) const override + { + llvm::IRBuilder<> & b = static_cast &>(builder); + + auto * return_type = toNativeType(b, getReturnType()); + + auto * value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + auto * value = b.CreateLoad(return_type, value_ptr); + + const auto & argument_value = argument_values[0]; + auto * result_value = Data::compileUpdate(builder, value, argument_value); + + b.CreateStore(result_value, value_ptr); + } + + void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override + { + llvm::IRBuilder<> & b = static_cast &>(builder); + + auto * return_type = toNativeType(b, getReturnType()); + + auto * value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo()); + auto * value_dst = b.CreateLoad(return_type, value_dst_ptr); + + auto * value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo()); + auto * value_src = b.CreateLoad(return_type, value_src_ptr); + + auto * result_value = Data::compileUpdate(builder, value_dst, value_src); + + b.CreateStore(result_value, value_dst_ptr); + } + + llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override + { + llvm::IRBuilder<> & b = static_cast &>(builder); + + auto * return_type = toNativeType(b, getReturnType()); + auto * value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo()); + + return b.CreateLoad(return_type, value_ptr); + } + +#endif + }; diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp index 4e4f89d8846..846476a5b79 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 4be2455d71e..3355cb0d6fc 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -101,6 +101,24 @@ struct AggregateFunctionSumData { const auto * end = ptr + count; + if constexpr ( + (is_integer_v && !is_big_int_v) + || (IsDecimalNumber && !std::is_same_v && !std::is_same_v)) + { + /// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null) + /// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I + T local_sum{}; + while (ptr < end) + { + T multiplier = !*null_map; + Impl::add(local_sum, *ptr * multiplier); + ++ptr; + ++null_map; + } + Impl::add(sum, local_sum); + return; + } + if constexpr (std::is_floating_point_v) { constexpr size_t unroll_count = 128 / sizeof(T); diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index 0e76bd5f756..0d1c831c839 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -49,6 +50,8 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const return res; else if (which.isDate()) return std::make_shared>(argument_types); + else if (which.isDate32()) + return std::make_shared>(argument_types); else if (which.isDateTime()) return std::make_shared>(argument_types); else if (which.isStringOrFixedString()) @@ -95,6 +98,8 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const return res; else if (which.isDate()) return std::make_shared>>(argument_types); + else if (which.isDate32()) + return std::make_shared>>(argument_types); else if (which.isDateTime()) return std::make_shared>>(argument_types); else if (which.isStringOrFixedString()) diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 8d1111519e9..e137937343b 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -51,6 +52,8 @@ namespace return res; else if (which.isDate()) return std::make_shared::template AggregateFunction>(argument_types, params); + else if (which.isDate32()) + return std::make_shared::template AggregateFunction>(argument_types, params); else if (which.isDateTime()) return std::make_shared::template AggregateFunction>(argument_types, params); else if (which.isStringOrFixedString()) diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp index e417517ef6d..99599f655df 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +62,8 @@ AggregateFunctionPtr createAggregateFunctionUniqUpTo(const std::string & name, c return res; else if (which.isDate()) return std::make_shared>(threshold, argument_types, params); + else if (which.isDate32()) + return std::make_shared>(threshold, argument_types, params); else if (which.isDateTime()) return std::make_shared>(threshold, argument_types, params); else if (which.isStringOrFixedString()) diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp index 4d105d2b8b9..961a8ff9081 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 81360c6794b..03ee76240cb 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -298,11 +298,19 @@ void ConfigProcessor::doIncludesRecursive( { const auto * subst = attributes->getNamedItem(attr_name); attr_nodes[attr_name] = subst; - substs_count += static_cast(subst == nullptr); + substs_count += static_cast(subst != nullptr); } - if (substs_count < SUBSTITUTION_ATTRS.size() - 1) /// only one substitution is allowed - throw Poco::Exception("several substitutions attributes set for element <" + node->nodeName() + ">"); + if (substs_count > 1) /// only one substitution is allowed + throw Poco::Exception("More than one substitution attribute is set for element <" + node->nodeName() + ">"); + + if (node->nodeName() == "include") + { + if (node->hasChildNodes()) + throw Poco::Exception(" element must have no children"); + if (substs_count == 0) + throw Poco::Exception("No substitution attributes set for element , must have exactly one"); + } /// Replace the original contents, not add to it. bool replace = attributes->getNamedItem("replace"); @@ -320,37 +328,57 @@ void ConfigProcessor::doIncludesRecursive( else if (throw_on_bad_incl) throw Poco::Exception(error_msg + name); else + { + if (node->nodeName() == "include") + node->parentNode()->removeChild(node); + LOG_WARNING(log, "{}{}", error_msg, name); + } } else { - Element & element = dynamic_cast(*node); - - for (const auto & attr_name : SUBSTITUTION_ATTRS) - element.removeAttribute(attr_name); - - if (replace) + /// Replace the whole node not just contents. + if (node->nodeName() == "include") { - while (Node * child = node->firstChild()) - node->removeChild(child); + const NodeListPtr children = node_to_include->childNodes(); + for (size_t i = 0, size = children->length(); i < size; ++i) + { + NodePtr new_node = config->importNode(children->item(i), true); + node->parentNode()->insertBefore(new_node, node); + } - element.removeAttribute("replace"); + node->parentNode()->removeChild(node); } - - const NodeListPtr children = node_to_include->childNodes(); - for (size_t i = 0, size = children->length(); i < size; ++i) + else { - NodePtr new_node = config->importNode(children->item(i), true); - node->appendChild(new_node); - } + Element & element = dynamic_cast(*node); - const NamedNodeMapPtr from_attrs = node_to_include->attributes(); - for (size_t i = 0, size = from_attrs->length(); i < size; ++i) - { - element.setAttributeNode(dynamic_cast(config->importNode(from_attrs->item(i), true))); - } + for (const auto & attr_name : SUBSTITUTION_ATTRS) + element.removeAttribute(attr_name); - included_something = true; + if (replace) + { + while (Node * child = node->firstChild()) + node->removeChild(child); + + element.removeAttribute("replace"); + } + + const NodeListPtr children = node_to_include->childNodes(); + for (size_t i = 0, size = children->length(); i < size; ++i) + { + NodePtr new_node = config->importNode(children->item(i), true); + node->appendChild(new_node); + } + + const NamedNodeMapPtr from_attrs = node_to_include->attributes(); + for (size_t i = 0, size = from_attrs->length(); i < size; ++i) + { + element.setAttributeNode(dynamic_cast(config->importNode(from_attrs->item(i), true))); + } + + included_something = true; + } } }; diff --git a/src/Common/Config/configReadClient.cpp b/src/Common/Config/configReadClient.cpp index cbe5b3f7bc2..e7bc0b72814 100644 --- a/src/Common/Config/configReadClient.cpp +++ b/src/Common/Config/configReadClient.cpp @@ -10,16 +10,10 @@ namespace fs = std::filesystem; namespace DB { -/// Checks if file exists without throwing an exception but with message in console. -bool safeFsExists(const auto & path) +bool safeFsExists(const String & path) { std::error_code ec; - bool res = fs::exists(path, ec); - if (ec) - { - std::cerr << "Can't check '" << path << "': [" << ec.value() << "] " << ec.message() << std::endl; - } - return res; + return fs::exists(path, ec); }; bool configReadClient(Poco::Util::LayeredConfiguration & config, const std::string & home_path) diff --git a/src/Common/HashTable/StringHashTable.h b/src/Common/HashTable/StringHashTable.h index b05d119e0e9..d30271d65db 100644 --- a/src/Common/HashTable/StringHashTable.h +++ b/src/Common/HashTable/StringHashTable.h @@ -237,7 +237,12 @@ public: // 1. Always memcpy 8 times bytes // 2. Use switch case extension to generate fast dispatching table // 3. Funcs are named callables that can be force_inlined + // // NOTE: It relies on Little Endianness + // + // NOTE: It requires padded to 8 bytes keys (IOW you cannot pass + // std::string here, but you can pass i.e. ColumnString::getDataAt()), + // since it copies 8 bytes at a time. template static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) { diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index dffe2239e62..915d14466b6 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -22,10 +22,6 @@ M(WriteBufferFromFileDescriptorWrite, "Number of writes (write/pwrite) to a file descriptor. Does not include sockets.") \ M(WriteBufferFromFileDescriptorWriteFailed, "Number of times the write (write/pwrite) to a file descriptor have failed.") \ M(WriteBufferFromFileDescriptorWriteBytes, "Number of bytes written to file descriptors. If the file is compressed, this will show compressed data size.") \ - M(ReadBufferAIORead, "") \ - M(ReadBufferAIOReadBytes, "") \ - M(WriteBufferAIOWrite, "") \ - M(WriteBufferAIOWriteBytes, "") \ M(ReadCompressedBytes, "Number of bytes (the number of bytes before decompression) read from compressed sources (files, network).") \ M(CompressedReadBufferBlocks, "Number of compressed blocks (the blocks of data that are compressed independent of each other) read from compressed sources (files, network).") \ M(CompressedReadBufferBytes, "Number of uncompressed bytes (the number of bytes after decompression) read from compressed sources (files, network).") \ @@ -34,6 +30,10 @@ M(UncompressedCacheWeightLost, "") \ M(MMappedFileCacheHits, "") \ M(MMappedFileCacheMisses, "") \ + M(AIOWrite, "Number of writes with Linux or FreeBSD AIO interface") \ + M(AIOWriteBytes, "Number of bytes written with Linux or FreeBSD AIO interface") \ + M(AIORead, "Number of reads with Linux or FreeBSD AIO interface") \ + M(AIOReadBytes, "Number of bytes read with Linux or FreeBSD AIO interface") \ M(IOBufferAllocs, "") \ M(IOBufferAllocBytes, "") \ M(ArenaAllocChunks, "") \ @@ -43,8 +43,8 @@ M(MarkCacheHits, "") \ M(MarkCacheMisses, "") \ M(CreatedReadBufferOrdinary, "") \ - M(CreatedReadBufferAIO, "") \ - M(CreatedReadBufferAIOFailed, "") \ + M(CreatedReadBufferDirectIO, "") \ + M(CreatedReadBufferDirectIOFailed, "") \ M(CreatedReadBufferMMap, "") \ M(CreatedReadBufferMMapFailed, "") \ M(DiskReadElapsedMicroseconds, "Total time spent waiting for read syscall. This include reads from page cache.") \ diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index e1a7c420c54..0d65eaece86 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -4,9 +4,6 @@ #include #include -/// FIXME: progress bar in clickhouse-local needs to be cleared after query execution -/// - same as it is now in clickhouse-client. Also there is no writeFinalProgress call -/// in clickhouse-local. namespace DB { diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index a816c1eb8bb..eb7f42f900a 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -45,6 +45,8 @@ struct ZooKeeperRequest : virtual Request /// If the request was sent and we didn't get the response and the error happens, then we cannot be sure was it processed or not. bool probably_sent = false; + bool restored_from_zookeeper_log = false; + ZooKeeperRequest() = default; ZooKeeperRequest(const ZooKeeperRequest &) = default; virtual ~ZooKeeperRequest() override = default; @@ -172,6 +174,9 @@ struct ZooKeeperCloseResponse final : ZooKeeperResponse struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest { + /// used only during restore from zookeeper log + int32_t parent_cversion = -1; + ZooKeeperCreateRequest() = default; explicit ZooKeeperCreateRequest(const CreateRequest & base) : CreateRequest(base) {} @@ -183,9 +188,6 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest bool isReadRequest() const override { return false; } size_t bytesSize() const override { return CreateRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); } - - /// During recovery from log we don't rehash ACLs - bool need_to_hash_acls = true; }; struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse @@ -362,8 +364,6 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest bool isReadRequest() const override { return false; } size_t bytesSize() const override { return SetACLRequest::bytesSize() + sizeof(xid); } - - bool need_to_hash_acls = true; }; struct ZooKeeperSetACLResponse final : SetACLResponse, ZooKeeperResponse diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index e14a1784b14..22ffb74f61a 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -47,13 +47,13 @@ CompressedReadBufferFromFile::CompressedReadBufferFromFile(std::unique_ptr(0) - , p_file_in(createReadBufferFromFileBase(path, estimated_size, aio_threshold, mmap_threshold, mmap_cache, buf_size)) + , p_file_in(createReadBufferFromFileBase(path, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache, buf_size)) , file_in(*p_file_in) { compressed_in = &file_in; diff --git a/src/Compression/CompressedReadBufferFromFile.h b/src/Compression/CompressedReadBufferFromFile.h index 2ee7021b35a..fe9add6f015 100644 --- a/src/Compression/CompressedReadBufferFromFile.h +++ b/src/Compression/CompressedReadBufferFromFile.h @@ -33,7 +33,7 @@ public: CompressedReadBufferFromFile(std::unique_ptr buf, bool allow_different_codecs_ = false); CompressedReadBufferFromFile( - const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache, + const std::string & path, size_t estimated_size, size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, bool allow_different_codecs_ = false); void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block); diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 97c78e04f05..4c3f649a6b6 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -267,13 +267,12 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest } else { - auto & session_auth_ids = storage.session_and_auth[session_id]; KeeperStorage::Node created_node; Coordination::ACLs node_acls; - if (!fixupACL(request.acls, session_auth_ids, node_acls, request.need_to_hash_acls)) + if (!fixupACL(request.acls, session_auth_ids, node_acls, !request.restored_from_zookeeper_log)) { response.error = Coordination::Error::ZINVALIDACL; return {response_ptr, {}}; @@ -307,16 +306,28 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest path_created += seq_num_str.str(); } + int32_t parent_cversion = request.parent_cversion; auto child_path = getBaseName(path_created); int64_t prev_parent_zxid; - container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid] (KeeperStorage::Node & parent) + int32_t prev_parent_cversion; + container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid, + parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent) { + + parent.children.insert(child_path); + prev_parent_cversion = parent.stat.cversion; + prev_parent_zxid = parent.stat.pzxid; + /// Increment sequential number even if node is not sequential ++parent.seq_num; - parent.children.insert(child_path); - ++parent.stat.cversion; - prev_parent_zxid = parent.stat.pzxid; - parent.stat.pzxid = zxid; + + if (parent_cversion == -1) + ++parent.stat.cversion; + else if (parent_cversion > parent.stat.cversion) + parent.stat.cversion = parent_cversion; + + if (zxid > parent.stat.pzxid) + parent.stat.pzxid = zxid; ++parent.stat.numChildren; }); @@ -326,7 +337,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest if (request.is_ephemeral) ephemerals[session_id].emplace(path_created); - undo = [&storage, prev_parent_zxid, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id] + undo = [&storage, prev_parent_zxid, prev_parent_cversion, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id] { storage.container.erase(path_created); storage.acl_map.removeUsage(acl_id); @@ -334,11 +345,11 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest if (is_ephemeral) storage.ephemerals[session_id].erase(path_created); - storage.container.updateValue(parent_path, [child_path, prev_parent_zxid] (KeeperStorage::Node & undo_parent) + storage.container.updateValue(parent_path, [child_path, prev_parent_zxid, prev_parent_cversion] (KeeperStorage::Node & undo_parent) { - --undo_parent.stat.cversion; --undo_parent.stat.numChildren; --undo_parent.seq_num; + undo_parent.stat.cversion = prev_parent_cversion; undo_parent.stat.pzxid = prev_parent_zxid; undo_parent.children.erase(child_path); }); @@ -394,6 +405,24 @@ struct KeeperStorageGetRequest final : public KeeperStorageRequest } }; +namespace +{ + /// Garbage required to apply log to "fuzzy" zookeeper snapshot + void updateParentPzxid(const std::string & child_path, int64_t zxid, KeeperStorage::Container & container) + { + auto parent_path = parentPath(child_path); + auto parent_it = container.find(parent_path); + if (parent_it != container.end()) + { + container.updateValue(parent_path, [zxid](KeeperStorage::Node & parent) + { + if (parent.stat.pzxid < zxid) + parent.stat.pzxid = zxid; + }); + } + } +} + struct KeeperStorageRemoveRequest final : public KeeperStorageRequest { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override @@ -412,7 +441,7 @@ struct KeeperStorageRemoveRequest final : public KeeperStorageRequest } using KeeperStorageRequest::KeeperStorageRequest; - std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override + std::pair process(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/) const override { auto & container = storage.container; auto & ephemerals = storage.ephemerals; @@ -425,6 +454,8 @@ struct KeeperStorageRemoveRequest final : public KeeperStorageRequest auto it = container.find(request.path); if (it == container.end()) { + if (request.restored_from_zookeeper_log) + updateParentPzxid(request.path, zxid, container); response.error = Coordination::Error::ZNONODE; } else if (request.version != -1 && request.version != it->value.stat.version) @@ -437,6 +468,9 @@ struct KeeperStorageRemoveRequest final : public KeeperStorageRequest } else { + if (request.restored_from_zookeeper_log) + updateParentPzxid(request.path, zxid, container); + auto prev_node = it->value; if (prev_node.stat.ephemeralOwner != 0) { @@ -719,7 +753,7 @@ struct KeeperStorageSetACLRequest final : public KeeperStorageRequest auto & session_auth_ids = storage.session_and_auth[session_id]; Coordination::ACLs node_acls; - if (!fixupACL(request.acls, session_auth_ids, node_acls, request.need_to_hash_acls)) + if (!fixupACL(request.acls, session_auth_ids, node_acls, !request.restored_from_zookeeper_log)) { response.error = Coordination::Error::ZINVALIDACL; return {response_ptr, {}}; diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 8bcce25cfee..cf644110786 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -174,7 +174,22 @@ void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::st LOG_INFO(log, "Deserializing data from snapshot"); int64_t zxid_from_nodes = deserializeStorageData(storage, reader, log); - storage.zxid = std::max(zxid, zxid_from_nodes); + /// In ZooKeeper Snapshots can contain inconsistent state of storage. They call + /// this inconsistent state "fuzzy". So it's guaranteed that snapshot contain all + /// records up to zxid from snapshot name and also some records for future. + /// But it doesn't mean that we have just some state of storage from future (like zxid + 100 log records). + /// We have incorrect state of storage where some random log entries from future were applied.... + /// + /// In ZooKeeper they say that their transactions log is idempotent and can be applied to "fuzzy" state as is. + /// It's true but there is no any general invariant which produces this property. They just have ad-hoc "if's" which detects + /// "fuzzy" state inconsistencies and apply log records in special way. Several examples: + /// https://github.com/apache/zookeeper/blob/master/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java#L453-L463 + /// https://github.com/apache/zookeeper/blob/master/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java#L476-L480 + /// https://github.com/apache/zookeeper/blob/master/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java#L547-L549 + if (zxid_from_nodes > zxid) + LOG_WARNING(log, "ZooKeeper snapshot was in inconsistent (fuzzy) state. Will try to apply log. ZooKeeper create non fuzzy snapshot with restart. You can just restart ZooKeeper server and get consistent version."); + + storage.zxid = zxid; LOG_INFO(log, "Finished, snapshot ZXID {}", storage.zxid); } @@ -210,16 +225,18 @@ void deserializeLogMagic(ReadBuffer & in) static constexpr int32_t LOG_HEADER = 1514884167; /// "ZKLG" if (magic_header != LOG_HEADER) - throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", LOG_HEADER, magic_header); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Incorrect magic header in file, expected {}, got {}", LOG_HEADER, magic_header); if (version != 2) - throw Exception(ErrorCodes::NOT_IMPLEMENTED,"Cannot deserialize ZooKeeper data other than version 2, got version {}", version); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot deserialize ZooKeeper data other than version 2, got version {}", version); } -/// For some reason zookeeper stores slightly different records in log then -/// requests. For example: -/// class CreateTxn { +/// ZooKeeper transactions log differs from requests. The main reason: to store records in log +/// in some "finalized" state (for example with concrete versions). +/// +/// Example: +/// class CreateTxn { /// ustring path; /// buffer data; /// vector acl; @@ -289,10 +306,9 @@ Coordination::ZooKeeperRequestPtr deserializeCreateTxn(ReadBuffer & in) Coordination::read(result->data, in); Coordination::read(result->acls, in); Coordination::read(result->is_ephemeral, in); - result->need_to_hash_acls = false; - /// How we should use it? It should just increment on request execution - int32_t parent_c_version; - Coordination::read(parent_c_version, in); + Coordination::read(result->parent_cversion, in); + + result->restored_from_zookeeper_log = true; return result; } @@ -300,6 +316,7 @@ Coordination::ZooKeeperRequestPtr deserializeDeleteTxn(ReadBuffer & in) { std::shared_ptr result = std::make_shared(); Coordination::read(result->path, in); + result->restored_from_zookeeper_log = true; return result; } @@ -309,6 +326,7 @@ Coordination::ZooKeeperRequestPtr deserializeSetTxn(ReadBuffer & in) Coordination::read(result->path, in); Coordination::read(result->data, in); Coordination::read(result->version, in); + result->restored_from_zookeeper_log = true; /// It stores version + 1 (which should be, not for request) result->version -= 1; @@ -320,6 +338,7 @@ Coordination::ZooKeeperRequestPtr deserializeCheckVersionTxn(ReadBuffer & in) std::shared_ptr result = std::make_shared(); Coordination::read(result->path, in); Coordination::read(result->version, in); + result->restored_from_zookeeper_log = true; return result; } @@ -329,14 +348,19 @@ Coordination::ZooKeeperRequestPtr deserializeCreateSession(ReadBuffer & in) int32_t timeout; Coordination::read(timeout, in); result->session_timeout_ms = timeout; + result->restored_from_zookeeper_log = true; return result; } -Coordination::ZooKeeperRequestPtr deserializeCloseSession(ReadBuffer & in) +Coordination::ZooKeeperRequestPtr deserializeCloseSession(ReadBuffer & in, bool empty) { std::shared_ptr result = std::make_shared(); - std::vector data; - Coordination::read(data, in); + if (!empty) + { + std::vector data; + Coordination::read(data, in); + } + result->restored_from_zookeeper_log = true; return result; } @@ -356,14 +380,14 @@ Coordination::ZooKeeperRequestPtr deserializeSetACLTxn(ReadBuffer & in) Coordination::read(result->version, in); /// It stores version + 1 (which should be, not for request) result->version -= 1; - result->need_to_hash_acls = false; + result->restored_from_zookeeper_log = true; return result; } Coordination::ZooKeeperRequestPtr deserializeMultiTxn(ReadBuffer & in); -Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtxn) +Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtxn, int64_t txn_length = 0) { int32_t type; Coordination::read(type, in); @@ -372,6 +396,11 @@ Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtx if (subtxn) Coordination::read(sub_txn_length, in); + bool empty_txn = !subtxn && txn_length == 32; /// Possible for old-style CloseTxn's + + if (empty_txn && type != -11) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Empty non close-session transaction found"); + int64_t in_count_before = in.count(); switch (type) @@ -398,7 +427,7 @@ Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtx result = deserializeCreateSession(in); break; case -11: - result = deserializeCloseSession(in); + result = deserializeCloseSession(in, empty_txn); break; case -1: result = deserializeErrorTxn(in); @@ -442,7 +471,7 @@ bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request) if (request == nullptr) return true; - for (const auto & subrequest : dynamic_cast(request.get())->requests) //-V522 + for (const auto & subrequest : dynamic_cast(request.get())->requests) // -V522 if (subrequest == nullptr) return true; return false; @@ -470,7 +499,7 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * /*l int64_t time; Coordination::read(time, in); - Coordination::ZooKeeperRequestPtr request = deserializeTxnImpl(in, false); + Coordination::ZooKeeperRequestPtr request = deserializeTxnImpl(in, false, txn_len); /// Skip all other bytes int64_t bytes_read = in.count() - count_before; diff --git a/src/Core/ExternalResultDescription.cpp b/src/Core/ExternalResultDescription.cpp index 809d8c5900d..e8ba9d6020f 100644 --- a/src/Core/ExternalResultDescription.cpp +++ b/src/Core/ExternalResultDescription.cpp @@ -62,6 +62,8 @@ void ExternalResultDescription::init(const Block & sample_block_) types.emplace_back(ValueType::vtString, is_nullable); else if (which.isDate()) types.emplace_back(ValueType::vtDate, is_nullable); + else if (which.isDate32()) + types.emplace_back(ValueType::vtDate32, is_nullable); else if (which.isDateTime()) types.emplace_back(ValueType::vtDateTime, is_nullable); else if (which.isUUID()) diff --git a/src/Core/ExternalResultDescription.h b/src/Core/ExternalResultDescription.h index 6311f816964..78c054e805f 100644 --- a/src/Core/ExternalResultDescription.h +++ b/src/Core/ExternalResultDescription.h @@ -26,6 +26,7 @@ struct ExternalResultDescription vtEnum16, vtString, vtDate, + vtDate32, vtDateTime, vtUUID, vtDateTime64, diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index a72c6205cd5..e606300fc37 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -89,6 +89,9 @@ void insertPostgreSQLValue( case ExternalResultDescription::ValueType::vtDate: assert_cast(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()}); break; + case ExternalResultDescription::ValueType::vtDate32: + assert_cast(column).insertValue(Int32{LocalDate{std::string(value)}.getExtenedDayNum()}); + break; case ExternalResultDescription::ValueType::vtDateTime: { ReadBufferFromString in(value); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 28e46160a98..8c733415dec 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -108,7 +108,7 @@ class IColumn; M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \ M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \ M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \ - M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \ + M(UInt64, min_count_to_compile_aggregate_expression, 0, "The number of identical aggregate expressions before they are JIT-compiled", 0) \ M(UInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.", 0) \ M(UInt64, group_by_two_level_threshold_bytes, 50000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \ M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \ diff --git a/src/Core/Types.h b/src/Core/Types.h index 151d9bdcf5a..5496f09f3d3 100644 --- a/src/Core/Types.h +++ b/src/Core/Types.h @@ -39,6 +39,7 @@ enum class TypeIndex Float32, Float64, Date, + Date32, DateTime, DateTime64, String, @@ -257,6 +258,7 @@ inline constexpr const char * getTypeName(TypeIndex idx) case TypeIndex::Float32: return "Float32"; case TypeIndex::Float64: return "Float64"; case TypeIndex::Date: return "Date"; + case TypeIndex::Date32: return "Date32"; case TypeIndex::DateTime: return "DateTime"; case TypeIndex::DateTime64: return "DateTime64"; case TypeIndex::String: return "String"; diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index 520be263f9f..b0420073998 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -192,6 +192,7 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args) case TypeIndex::Decimal256: return f(TypePair, T>(), std::forward(args)...); case TypeIndex::Date: return f(TypePair(), std::forward(args)...); + case TypeIndex::Date32: return f(TypePair(), std::forward(args)...); case TypeIndex::DateTime: return f(TypePair(), std::forward(args)...); case TypeIndex::DateTime64: return f(TypePair(), std::forward(args)...); diff --git a/src/DataTypes/DataTypeDate32.cpp b/src/DataTypes/DataTypeDate32.cpp new file mode 100644 index 00000000000..83b1260eb6d --- /dev/null +++ b/src/DataTypes/DataTypeDate32.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + +namespace DB +{ +bool DataTypeDate32::equals(const IDataType & rhs) const +{ + return typeid(rhs) == typeid(*this); +} + +SerializationPtr DataTypeDate32::doGetDefaultSerialization() const +{ + return std::make_shared(); +} + +void registerDataTypeDate32(DataTypeFactory & factory) +{ + factory.registerSimpleDataType( + "Date32", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); +} + +} diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h new file mode 100644 index 00000000000..17f2f8b9924 --- /dev/null +++ b/src/DataTypes/DataTypeDate32.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ +class DataTypeDate32 final : public DataTypeNumberBase +{ +public: + static constexpr auto family_name = "Date32"; + + TypeIndex getTypeId() const override { return TypeIndex::Date32; } + const char * getFamilyName() const override { return family_name; } + + bool canBeUsedAsVersion() const override { return true; } + bool canBeInsideNullable() const override { return true; } + + bool equals(const IDataType & rhs) const override; + +protected: + SerializationPtr doGetDefaultSerialization() const override; +}; +} diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index c28de15565c..63b7c54306d 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -194,6 +194,7 @@ DataTypeFactory::DataTypeFactory() registerDataTypeNumbers(*this); registerDataTypeDecimal(*this); registerDataTypeDate(*this); + registerDataTypeDate32(*this); registerDataTypeDateTime(*this); registerDataTypeString(*this); registerDataTypeFixedString(*this); diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 9fa3e30297b..e44c66466a1 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -69,6 +69,7 @@ private: void registerDataTypeNumbers(DataTypeFactory & factory); void registerDataTypeDecimal(DataTypeFactory & factory); void registerDataTypeDate(DataTypeFactory & factory); +void registerDataTypeDate32(DataTypeFactory & factory); void registerDataTypeDateTime(DataTypeFactory & factory); void registerDataTypeString(DataTypeFactory & factory); void registerDataTypeFixedString(DataTypeFactory & factory); diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index dc7617b82c1..600d83bb112 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -78,6 +78,8 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat return creator(static_cast(nullptr)); else if (which.isDate()) return creator(static_cast *>(nullptr)); + else if (which.isDate32()) + return creator(static_cast *>(nullptr)); else if (which.isDateTime()) return creator(static_cast *>(nullptr)); else if (which.isUUID()) diff --git a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index a68dc30d5c2..41ba81814d0 100644 --- a/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -1,11 +1,13 @@ #include #include #include +#include #include #include #include #include +#include #include @@ -39,6 +41,11 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) return std::make_shared(elements); } + if (const auto * map_type = typeid_cast(type.get())) + { + return std::make_shared(recursiveRemoveLowCardinality(map_type->getKeyType()), recursiveRemoveLowCardinality(map_type->getValueType())); + } + if (const auto * low_cardinality_type = typeid_cast(type.get())) return low_cardinality_type->getDictionaryType(); @@ -78,6 +85,16 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) return ColumnTuple::create(columns); } + if (const auto * column_map = typeid_cast(column.get())) + { + const auto & nested = column_map->getNestedColumnPtr(); + auto nested_no_lc = recursiveRemoveLowCardinality(nested); + if (nested.get() == nested_no_lc.get()) + return column; + + return ColumnMap::create(nested_no_lc); + } + if (const auto * column_low_cardinality = typeid_cast(column.get())) return column_low_cardinality->convertToFullColumn(); diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 3f2d9987018..8fd375aa86e 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -53,12 +54,24 @@ DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & valu void DataTypeMap::assertKeyType() const { - if (!key_type->isValueRepresentedByInteger() + bool type_error = false; + if (key_type->getTypeId() == TypeIndex::LowCardinality) + { + const auto & low_cardinality_data_type = assert_cast(*key_type); + if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType()))) + type_error = true; + } + else if (!key_type->isValueRepresentedByInteger() && !isStringOrFixedString(*key_type) && !WhichDataType(key_type).isNothing() && !WhichDataType(key_type).isUUID()) + { + type_error = true; + } + + if (type_error) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Type of Map key must be a type, that can be represented by integer or string or UUID," + "Type of Map key must be a type, that can be represented by integer or String or FixedString (possibly LowCardinality) or UUID," " but {} given", key_type->getName()); } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index fb410336dcb..5eba65e39b9 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -322,8 +322,10 @@ struct WhichDataType constexpr bool isEnum() const { return isEnum8() || isEnum16(); } constexpr bool isDate() const { return idx == TypeIndex::Date; } + constexpr bool isDate32() const { return idx == TypeIndex::Date32; } constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; } constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; } + constexpr bool isDateOrDate32() const { return isDate() || isDate32(); } constexpr bool isString() const { return idx == TypeIndex::String; } constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; } @@ -347,6 +349,10 @@ struct WhichDataType template inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); } template +inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); } +template +inline bool isDateOrDate32(const T & data_type) { return WhichDataType(data_type).isDateOrDate32(); } +template inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); } template inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); } diff --git a/src/DataTypes/Native.h b/src/DataTypes/Native.h index 88f99b60ed7..970b70f9f0b 100644 --- a/src/DataTypes/Native.h +++ b/src/DataTypes/Native.h @@ -29,7 +29,7 @@ namespace ErrorCodes static inline bool typeIsSigned(const IDataType & type) { WhichDataType data_type(type); - return data_type.isNativeInt() || data_type.isFloat(); + return data_type.isNativeInt() || data_type.isFloat() || data_type.isEnum(); } static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDataType & type) @@ -57,6 +57,10 @@ static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDa return builder.getFloatTy(); else if (data_type.isFloat64()) return builder.getDoubleTy(); + else if (data_type.isEnum8()) + return builder.getInt8Ty(); + else if (data_type.isEnum16()) + return builder.getInt16Ty(); return nullptr; } @@ -109,7 +113,7 @@ static inline bool canBeNativeType(const IDataType & type) return canBeNativeType(*data_type_nullable.getNestedType()); } - return data_type.isNativeInt() || data_type.isNativeUInt() || data_type.isFloat() || data_type.isDate(); + return data_type.isNativeInt() || data_type.isNativeUInt() || data_type.isFloat() || data_type.isDate() || data_type.isEnum(); } static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type) @@ -266,7 +270,7 @@ static inline llvm::Constant * getColumnNativeValue(llvm::IRBuilderBase & builde { return llvm::ConstantInt::get(type, column.getUInt(index)); } - else if (column_data_type.isNativeInt()) + else if (column_data_type.isNativeInt() || column_data_type.isEnum()) { return llvm::ConstantInt::get(type, column.getInt(index)); } diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp new file mode 100644 index 00000000000..e43edbac592 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -0,0 +1,78 @@ +#include +#include +#include + +#include + +#include + +namespace DB +{ +void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +{ + writeDateText(ExtendedDayNum(assert_cast(column).getData()[row_num]), ostr); +} + +void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings); +} + +void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + ExtendedDayNum x; + readDateText(x, istr); + assert_cast(column).getData().push_back(x); +} + +void SerializationDate32::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeText(column, row_num, ostr, settings); +} + +void SerializationDate32::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + writeChar('\'', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('\'', ostr); +} + +void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + ExtendedDayNum x; + assertChar('\'', istr); + readDateText(x, istr); + assertChar('\'', istr); + assert_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. +} + +void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + writeChar('"', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('"', ostr); +} + +void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + ExtendedDayNum x; + assertChar('"', istr); + readDateText(x, istr); + assertChar('"', istr); + assert_cast(column).getData().push_back(x); +} + +void SerializationDate32::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + writeChar('"', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('"', ostr); +} + +void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + LocalDate value; + readCSV(value, istr); + assert_cast(column).getData().push_back(value.getExtenedDayNum()); +} +} diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h new file mode 100644 index 00000000000..484b4f4a958 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace DB +{ +class SerializationDate32 final : public SerializationNumber +{ +public: + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; +}; +} diff --git a/src/DataTypes/ya.make b/src/DataTypes/ya.make index e7294c298e5..ab34e467795 100644 --- a/src/DataTypes/ya.make +++ b/src/DataTypes/ya.make @@ -16,6 +16,7 @@ SRCS( DataTypeCustomIPv4AndIPv6.cpp DataTypeCustomSimpleAggregateFunction.cpp DataTypeDate.cpp + DataTypeDate32.cpp DataTypeDateTime.cpp DataTypeDateTime64.cpp DataTypeDecimalBase.cpp @@ -45,6 +46,7 @@ SRCS( Serializations/SerializationArray.cpp Serializations/SerializationCustomSimpleText.cpp Serializations/SerializationDate.cpp + Serializations/SerializationDate32.cpp Serializations/SerializationDateTime.cpp Serializations/SerializationDateTime64.cpp Serializations/SerializationDecimal.cpp diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index 7232d2d01b7..395328a904d 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -26,8 +26,10 @@ namespace ProfileEvents { extern const Event FileOpen; - extern const Event WriteBufferAIOWrite; - extern const Event WriteBufferAIOWriteBytes; + extern const Event AIOWrite; + extern const Event AIOWriteBytes; + extern const Event AIORead; + extern const Event AIOReadBytes; } namespace DB @@ -531,8 +533,8 @@ public: auto bytes_written = eventResult(event); - ProfileEvents::increment(ProfileEvents::WriteBufferAIOWrite); - ProfileEvents::increment(ProfileEvents::WriteBufferAIOWriteBytes, bytes_written); + ProfileEvents::increment(ProfileEvents::AIOWrite); + ProfileEvents::increment(ProfileEvents::AIOWriteBytes, bytes_written); if (bytes_written != static_cast(block_size * buffer_size_in_blocks)) throw Exception(ErrorCodes::AIO_WRITE_ERROR, @@ -600,6 +602,9 @@ public: buffer_size_in_bytes, read_bytes); + ProfileEvents::increment(ProfileEvents::AIORead); + ProfileEvents::increment(ProfileEvents::AIOReadBytes, read_bytes); + SSDCacheBlock block(block_size); for (size_t i = 0; i < blocks_length; ++i) @@ -687,6 +692,9 @@ public: throw Exception(ErrorCodes::AIO_READ_ERROR, "GC: AIO failed to read file ({}). Expected bytes ({}). Actual bytes ({})", file_path, block_size, read_bytes); + ProfileEvents::increment(ProfileEvents::AIORead); + ProfileEvents::increment(ProfileEvents::AIOReadBytes, read_bytes); + char * request_buffer = getRequestBuffer(request); // Unpoison the memory returned from an uninstrumented system function. diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index d5b82edb134..f672376841e 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -90,17 +90,17 @@ DiskCacheWrapper::readFile( const String & path, size_t buf_size, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const { if (!cache_file_predicate(path)) - return DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + return DiskDecorator::readFile(path, buf_size, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache); LOG_DEBUG(log, "Read file {} from cache", backQuote(path)); if (cache_disk->exists(path)) - return cache_disk->readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + return cache_disk->readFile(path, buf_size, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache); auto metadata = acquireDownloadMetadata(path); @@ -134,7 +134,7 @@ DiskCacheWrapper::readFile( auto tmp_path = path + ".tmp"; { - auto src_buffer = DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + auto src_buffer = DiskDecorator::readFile(path, buf_size, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache); auto dst_buffer = cache_disk->writeFile(tmp_path, buf_size, WriteMode::Rewrite); copyData(*src_buffer, *dst_buffer); } @@ -158,9 +158,9 @@ DiskCacheWrapper::readFile( } if (metadata->status == DOWNLOADED) - return cache_disk->readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + return cache_disk->readFile(path, buf_size, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache); - return DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + return DiskDecorator::readFile(path, buf_size, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache); } std::unique_ptr diff --git a/src/Disks/DiskCacheWrapper.h b/src/Disks/DiskCacheWrapper.h index 6d58394640f..7e711dd521c 100644 --- a/src/Disks/DiskCacheWrapper.h +++ b/src/Disks/DiskCacheWrapper.h @@ -38,7 +38,7 @@ public: const String & path, size_t buf_size, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const override; diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index d1ff3f9f827..7237a249bcb 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -115,9 +115,9 @@ void DiskDecorator::listFiles(const String & path, std::vector & file_na std::unique_ptr DiskDecorator::readFile( - const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const + const String & path, size_t buf_size, size_t estimated_size, size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const { - return delegate->readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + return delegate->readFile(path, buf_size, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache); } std::unique_ptr diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index 401078e6b2e..0910f4c28cd 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -39,7 +39,7 @@ public: const String & path, size_t buf_size, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const override; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 89c1514f5c8..4ceb76ab059 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -211,9 +211,9 @@ void DiskLocal::replaceFile(const String & from_path, const String & to_path) std::unique_ptr DiskLocal::readFile( - const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const + const String & path, size_t buf_size, size_t estimated_size, size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const { - return createReadBufferFromFileBase(fs::path(disk_path) / path, estimated_size, aio_threshold, mmap_threshold, mmap_cache, buf_size); + return createReadBufferFromFileBase(fs::path(disk_path) / path, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache, buf_size); } std::unique_ptr diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 47482ad8d67..63a6fe59bea 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -74,7 +74,7 @@ public: const String & path, size_t buf_size, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const override; diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index d5c57b20a4a..40fd2b2a9f9 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -66,7 +66,7 @@ public: const String & path, size_t buf_size, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const override; diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 2600dc5a1e1..1bd5b2acf50 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -187,11 +187,11 @@ void DiskRestartProxy::listFiles(const String & path, std::vector & file } std::unique_ptr DiskRestartProxy::readFile( - const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) + const String & path, size_t buf_size, size_t estimated_size, size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const { ReadLock lock (mutex); - auto impl = DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + auto impl = DiskDecorator::readFile(path, buf_size, estimated_size, direct_io_threshold, mmap_threshold, mmap_cache); return std::make_unique(*this, std::move(impl)); } diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h index f5502d9d68f..e6c94d9ad7b 100644 --- a/src/Disks/DiskRestartProxy.h +++ b/src/Disks/DiskRestartProxy.h @@ -47,7 +47,7 @@ public: const String & path, size_t buf_size, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const override; std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 49fdf44728b..1f93192fd57 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -48,7 +48,7 @@ public: const String & path, size_t buf_size, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const override; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index ecaf7d63fdc..f9e7624f4ab 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -156,7 +156,7 @@ public: const String & path, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, size_t estimated_size = 0, - size_t aio_threshold = 0, + size_t direct_io_threshold = 0, size_t mmap_threshold = 0, MMappedFileCache * mmap_cache = nullptr) const = 0; diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index b30e9613ed8..a4dcc8037bc 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -417,7 +417,11 @@ void IDiskRemote::removeDirectory(const String & path) DiskDirectoryIteratorPtr IDiskRemote::iterateDirectory(const String & path) { - return std::make_unique(metadata_path + path, path); + fs::path meta_path = fs::path(metadata_path) / path; + if (fs::exists(meta_path) && fs::is_directory(meta_path)) + return std::make_unique(meta_path, path); + else + return std::make_unique(); } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index e725e0ed744..360d4e2de33 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -193,6 +193,7 @@ struct IDiskRemote::Metadata class RemoteDiskDirectoryIterator final : public IDiskDirectoryIterator { public: + RemoteDiskDirectoryIterator() {} RemoteDiskDirectoryIterator(const String & full_path, const String & folder_path_) : iter(full_path), folder_path(folder_path_) {} void next() override { ++iter; } diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 21bf0d3867b..fc7c832e45d 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -77,7 +77,7 @@ public: const String & path, size_t buf_size, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache) const override; diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index 2b28620c84e..bb9856096a9 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -42,6 +42,11 @@ struct ToYearWeekImpl YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode | static_cast(WeekModeFlag::YEAR)); return yw.first * 100 + yw.second; } + static inline UInt32 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) + { + YearWeek yw = time_zone.toYearWeek(ExtendedDayNum (d), week_mode | static_cast(WeekModeFlag::YEAR)); + return yw.first * 100 + yw.second; + } static inline UInt32 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone) { YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode | static_cast(WeekModeFlag::YEAR)); @@ -65,6 +70,10 @@ struct ToStartOfWeekImpl return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); // return time_zone.toFirstDayNumOfWeek(t, week_mode); } + static inline UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) + { + return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode); + } static inline UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode); @@ -88,6 +97,11 @@ struct ToWeekImpl YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode); return yw.second; } + static inline UInt8 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) + { + YearWeek yw = time_zone.toYearWeek(ExtendedDayNum(d), week_mode); + return yw.second; + } static inline UInt8 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone) { YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode); diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 0f36fe52465..d12bc1701ad 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -46,6 +46,7 @@ struct ZeroTransform { static inline UInt16 execute(Int64, const DateLUTImpl &) { return 0; } static inline UInt16 execute(UInt32, const DateLUTImpl &) { return 0; } + static inline UInt16 execute(Int32, const DateLUTImpl &) { return 0; } static inline UInt16 execute(UInt16, const DateLUTImpl &) { return 0; } }; @@ -61,6 +62,10 @@ struct ToDateImpl { return UInt16(time_zone.toDayNum(t)); } + static inline UInt16 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl &) { return d; @@ -82,6 +87,10 @@ struct ToStartOfDayImpl { return time_zone.toDate(t); } + static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toDate(ExtendedDayNum(d)); + } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDate(ExtendedDayNum(d)); @@ -104,6 +113,10 @@ struct ToMondayImpl //return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t)); return time_zone.toFirstDayNumOfWeek(t); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d)); @@ -124,6 +137,10 @@ struct ToStartOfMonthImpl { return time_zone.toFirstDayNumOfMonth(time_zone.toDayNum(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toFirstDayNumOfMonth(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfMonth(ExtendedDayNum(d)); @@ -144,6 +161,10 @@ struct ToStartOfQuarterImpl { return time_zone.toFirstDayNumOfQuarter(time_zone.toDayNum(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(d)); @@ -164,6 +185,10 @@ struct ToStartOfYearImpl { return time_zone.toFirstDayNumOfYear(time_zone.toDayNum(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toFirstDayNumOfYear(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfYear(ExtendedDayNum(d)); @@ -186,7 +211,10 @@ struct ToTimeImpl { return time_zone.toTime(t) + 86400; } - + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt32 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -207,6 +235,10 @@ struct ToStartOfMinuteImpl { return time_zone.toStartOfMinute(t); } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt32 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -242,6 +274,10 @@ struct ToStartOfSecondImpl { throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt32 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -262,6 +298,10 @@ struct ToStartOfFiveMinuteImpl { return time_zone.toStartOfFiveMinute(t); } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt32 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -282,6 +322,10 @@ struct ToStartOfTenMinutesImpl { return time_zone.toStartOfTenMinutes(t); } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt32 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -302,6 +346,10 @@ struct ToStartOfFifteenMinutesImpl { return time_zone.toStartOfFifteenMinutes(t); } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt32 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -326,6 +374,11 @@ struct TimeSlotImpl return t / 1800 * 1800; } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -348,6 +401,11 @@ struct ToStartOfHourImpl return time_zone.toStartOfHour(t); } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -368,6 +426,10 @@ struct ToYearImpl { return time_zone.toYear(t); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toYear(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toYear(ExtendedDayNum(d)); @@ -388,6 +450,10 @@ struct ToQuarterImpl { return time_zone.toQuarter(t); } + static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toQuarter(ExtendedDayNum(d)); + } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toQuarter(ExtendedDayNum(d)); @@ -408,6 +474,10 @@ struct ToMonthImpl { return time_zone.toMonth(t); } + static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toMonth(ExtendedDayNum(d)); + } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toMonth(ExtendedDayNum(d)); @@ -428,6 +498,10 @@ struct ToDayOfMonthImpl { return time_zone.toDayOfMonth(t); } + static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toDayOfMonth(ExtendedDayNum(d)); + } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfMonth(ExtendedDayNum(d)); @@ -448,6 +522,10 @@ struct ToDayOfWeekImpl { return time_zone.toDayOfWeek(t); } + static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toDayOfWeek(ExtendedDayNum(d)); + } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(ExtendedDayNum(d)); @@ -468,6 +546,10 @@ struct ToDayOfYearImpl { return time_zone.toDayOfYear(t); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toDayOfYear(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfYear(ExtendedDayNum(d)); @@ -488,7 +570,10 @@ struct ToHourImpl { return time_zone.toHour(t); } - + static inline UInt8 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt8 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -511,6 +596,11 @@ struct TimezoneOffsetImpl return time_zone.timezoneOffset(t); } + static inline time_t execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline time_t execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -531,6 +621,10 @@ struct ToMinuteImpl { return time_zone.toMinute(t); } + static inline UInt8 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt8 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -551,6 +645,10 @@ struct ToSecondImpl { return time_zone.toSecond(t); } + static inline UInt8 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } static inline UInt8 execute(UInt16, const DateLUTImpl &) { return dateIsNotSupported(name); @@ -571,6 +669,10 @@ struct ToISOYearImpl { return time_zone.toISOYear(time_zone.toDayNum(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toISOYear(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toISOYear(ExtendedDayNum(d)); @@ -591,6 +693,10 @@ struct ToStartOfISOYearImpl { return time_zone.toFirstDayNumOfISOYear(time_zone.toDayNum(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toFirstDayNumOfISOYear(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toFirstDayNumOfISOYear(ExtendedDayNum(d)); @@ -611,6 +717,10 @@ struct ToISOWeekImpl { return time_zone.toISOWeek(time_zone.toDayNum(t)); } + static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toISOWeek(ExtendedDayNum(d)); + } static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(ExtendedDayNum(d)); @@ -631,6 +741,10 @@ struct ToRelativeYearNumImpl { return time_zone.toYear(static_cast(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toYear(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toYear(ExtendedDayNum(d)); @@ -651,6 +765,10 @@ struct ToRelativeQuarterNumImpl { return time_zone.toRelativeQuarterNum(static_cast(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeQuarterNum(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toRelativeQuarterNum(ExtendedDayNum(d)); @@ -671,6 +789,10 @@ struct ToRelativeMonthNumImpl { return time_zone.toRelativeMonthNum(static_cast(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMonthNum(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toRelativeMonthNum(ExtendedDayNum(d)); @@ -691,6 +813,10 @@ struct ToRelativeWeekNumImpl { return time_zone.toRelativeWeekNum(static_cast(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeWeekNum(ExtendedDayNum(d)); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toRelativeWeekNum(ExtendedDayNum(d)); @@ -711,6 +837,10 @@ struct ToRelativeDayNumImpl { return time_zone.toDayNum(static_cast(t)); } + static inline UInt16 execute(Int32 d, const DateLUTImpl &) + { + return static_cast(d); + } static inline UInt16 execute(UInt16 d, const DateLUTImpl &) { return static_cast(d); @@ -732,6 +862,10 @@ struct ToRelativeHourNumImpl { return time_zone.toRelativeHourNum(static_cast(t)); } + static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeHourNum(ExtendedDayNum(d)); + } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toRelativeHourNum(ExtendedDayNum(d)); @@ -752,6 +886,10 @@ struct ToRelativeMinuteNumImpl { return time_zone.toRelativeMinuteNum(static_cast(t)); } + static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMinuteNum(ExtendedDayNum(d)); + } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toRelativeMinuteNum(ExtendedDayNum(d)); @@ -772,6 +910,10 @@ struct ToRelativeSecondNumImpl { return t; } + static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.fromDayNum(ExtendedDayNum(d)); + } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.fromDayNum(ExtendedDayNum(d)); @@ -792,6 +934,10 @@ struct ToYYYYMMImpl { return time_zone.toNumYYYYMM(t); } + static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toNumYYYYMM(static_cast(d)); + } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMM(static_cast(d)); @@ -812,6 +958,10 @@ struct ToYYYYMMDDImpl { return time_zone.toNumYYYYMMDD(t); } + static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toNumYYYYMMDD(static_cast(d)); + } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDD(static_cast(d)); @@ -832,6 +982,10 @@ struct ToYYYYMMDDhhmmssImpl { return time_zone.toNumYYYYMMDDhhmmss(t); } + static inline UInt64 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(static_cast(d))); + } static inline UInt64 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(static_cast(d))); diff --git a/src/Functions/FunctionCustomWeekToSomething.h b/src/Functions/FunctionCustomWeekToSomething.h index 2bcf3a3e5ab..b5ea01418c1 100644 --- a/src/Functions/FunctionCustomWeekToSomething.h +++ b/src/Functions/FunctionCustomWeekToSomething.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -35,7 +36,7 @@ public: { if (arguments.size() == 1) { - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) throw Exception( "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + ". Should be a date or a date with time", @@ -43,7 +44,7 @@ public: } else if (arguments.size() == 2) { - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) throw Exception( "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + ". Should be a date or a date with time", @@ -59,7 +60,7 @@ public: } else if (arguments.size() == 3) { - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) throw Exception( "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + ". Should be a date or a date with time", @@ -105,6 +106,9 @@ public: if (which.isDate()) return CustomWeekTransformImpl::execute( arguments, result_type, input_rows_count, Transform{}); + else if (which.isDate32()) + return CustomWeekTransformImpl::execute( + arguments, result_type, input_rows_count, Transform{}); else if (which.isDateTime()) return CustomWeekTransformImpl::execute( arguments, result_type, input_rows_count, Transform{}); diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index c87cf863154..cfb53a65f7f 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -50,7 +51,11 @@ struct AddSecondsImpl { return t + delta; } - + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + { + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; + } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) { return time_zone.fromDayNum(ExtendedDayNum(d)) + delta; @@ -71,7 +76,11 @@ struct AddMinutesImpl { return t + delta * 60; } - + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + { + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; + } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) { return time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60; @@ -91,7 +100,11 @@ struct AddHoursImpl { return t + delta * 3600; } - + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + { + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; + } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) { return time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600; @@ -117,6 +130,11 @@ struct AddDaysImpl { return d + delta; } + + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &) + { + return d + delta; + } }; struct AddWeeksImpl @@ -124,17 +142,22 @@ struct AddWeeksImpl static constexpr auto name = "addWeeks"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone) { return {time_zone.addWeeks(t.whole, delta), t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) { return time_zone.addWeeks(t, delta); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &) + { + return d + delta * 7; + } + + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &) { return d + delta * 7; } @@ -159,6 +182,11 @@ struct AddMonthsImpl { return time_zone.addMonths(ExtendedDayNum(d), delta); } + + static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + { + return time_zone.addMonths(ExtendedDayNum(d), delta); + } }; struct AddQuartersImpl @@ -166,17 +194,22 @@ struct AddQuartersImpl static constexpr auto name = "addQuarters"; static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone) { return {time_zone.addQuarters(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) { return time_zone.addQuarters(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone) + { + return time_zone.addQuarters(ExtendedDayNum(d), delta); + } + + static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone) { return time_zone.addQuarters(ExtendedDayNum(d), delta); } @@ -201,6 +234,11 @@ struct AddYearsImpl { return time_zone.addYears(ExtendedDayNum(d), delta); } + + static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + { + return time_zone.addYears(ExtendedDayNum(d), delta); + } }; template @@ -342,7 +380,7 @@ template struct ResultDataTypeMap {}; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDate; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDate; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime; }; -template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime; }; +template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDate32; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime64; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime64; }; } @@ -375,7 +413,7 @@ public: if (arguments.size() == 2) { - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + ". Should be a date or a date with time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } @@ -398,6 +436,8 @@ public: { case TypeIndex::Date: return resolveReturnType(arguments); + case TypeIndex::Date32: + return resolveReturnType(arguments); case TypeIndex::DateTime: return resolveReturnType(arguments); case TypeIndex::DateTime64: @@ -437,16 +477,23 @@ public: if constexpr (std::is_same_v) return std::make_shared(); + else if constexpr (std::is_same_v) + return std::make_shared(); else if constexpr (std::is_same_v) { return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } else if constexpr (std::is_same_v) { - // TODO (vnemkov): what if there is an overload of Transform::execute() that returns DateTime64 from DateTime or Date ? - // Shall we use the default scale or one from optional argument ? - const auto & datetime64_type = assert_cast(*arguments[0].type); - return std::make_shared(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + if (typeid_cast(arguments[0].type.get())) + { + const auto & datetime64_type = assert_cast(*arguments[0].type); + return std::make_shared(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + else + { + return std::make_shared(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } } else { @@ -470,6 +517,11 @@ public: return DateTimeAddIntervalImpl, Transform>::execute( Transform{}, arguments, result_type); } + else if (which.isDate32()) + { + return DateTimeAddIntervalImpl, Transform>::execute( + Transform{}, arguments, result_type); + } else if (which.isDateTime()) { return DateTimeAddIntervalImpl, Transform>::execute( diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 43b1c8e4aae..8bd5218261e 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -38,7 +39,7 @@ public: { if (arguments.size() == 1) { - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) throw Exception( "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + ". Should be a date or a date with time", @@ -46,7 +47,7 @@ public: } else if (arguments.size() == 2) { - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) throw Exception( "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + ". Should be a date or a date with time", @@ -57,7 +58,7 @@ public: "must be of type Date or DateTime. The 2nd argument (optional) must be " "a constant string with timezone name", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (isDate(arguments[0].type) && std::is_same_v) + if ((isDate(arguments[0].type) || isDate32(arguments[0].type)) && (std::is_same_v || std::is_same_v)) throw Exception( "The timezone argument of function " + getName() + " is allowed only when the 1st argument has the type DateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -103,6 +104,8 @@ public: if (which.isDate()) return DateTimeTransformImpl::execute(arguments, result_type, input_rows_count); + else if (which.isDate32()) + return DateTimeTransformImpl::execute(arguments, result_type, input_rows_count); else if (which.isDateTime()) return DateTimeTransformImpl::execute(arguments, result_type, input_rows_count); else if (which.isDateTime64()) @@ -146,6 +149,12 @@ public: == Transform::FactorTransform::execute(UInt16(right.get()), date_lut) ? is_monotonic : is_not_monotonic; } + else if (checkAndGetDataType(&type)) + { + return Transform::FactorTransform::execute(Int32(left.get()), date_lut) + == Transform::FactorTransform::execute(Int32(right.get()), date_lut) + ? is_monotonic : is_not_monotonic; + } else { return Transform::FactorTransform::execute(UInt32(left.get()), date_lut) diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h index 2250fa3ccf0..c701625e9cd 100644 --- a/src/Functions/FunctionJoinGet.h +++ b/src/Functions/FunctionJoinGet.h @@ -28,7 +28,7 @@ public: static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override; diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h index 72f2aa1be1c..00b09acea1f 100644 --- a/src/Functions/FunctionsCoding.h +++ b/src/Functions/FunctionsCoding.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -978,7 +979,8 @@ public: !which.isDateTime64() && !which.isUInt() && !which.isFloat() && - !which.isDecimal()) + !which.isDecimal() && + !which.isAggregateFunction()) throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -990,6 +992,15 @@ public: const IColumn * column = arguments[0].column.get(); ColumnPtr res_column; + WhichDataType which(column->getDataType()); + if (which.isAggregateFunction()) + { + const ColumnPtr to_string = castColumn(arguments[0], std::make_shared()); + const auto * str_column = checkAndGetColumn(to_string.get()); + tryExecuteString(str_column, res_column); + return res_column; + } + if (tryExecuteUInt(column, res_column) || tryExecuteUInt(column, res_column) || tryExecuteUInt(column, res_column) || diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 239a0b30398..389b150e381 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1081,7 +1081,7 @@ public: const DataTypeTuple * right_tuple = checkAndGetDataType(arguments[1].get()); bool both_represented_by_number = arguments[0]->isValueRepresentedByNumber() && arguments[1]->isValueRepresentedByNumber(); - bool has_date = left.isDate() || right.isDate(); + bool has_date = left.isDateOrDate32() || right.isDateOrDate32(); if (!((both_represented_by_number && !has_date) /// Do not allow to compare date and number. || (left.isStringOrFixedString() || right.isStringOrFixedString()) /// Everything can be compared with string by conversion. diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index b4cf875adfb..d7686318efc 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -32,7 +32,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); /// MysQL compatibility alias. factory.registerFunction("DATE", FunctionFactory::CaseInsensitive); - + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); @@ -62,6 +62,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); @@ -87,6 +88,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index ff97894d98e..bef2d1816f2 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -670,6 +671,8 @@ struct ConvertImpl) data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); else if constexpr (std::is_same_v) data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); else if constexpr (std::is_same_v) @@ -751,6 +754,14 @@ inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb x = tmp; } +template <> +inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +{ + ExtendedDayNum tmp(0); + readDateText(tmp, rb); + x = tmp; +} + // NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. template <> inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) @@ -791,6 +802,16 @@ inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & return true; } +template <> +inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) +{ + ExtendedDayNum tmp(0); + if (!tryReadDateText(tmp, rb)) + return false; + x = tmp; + return true; +} + template <> inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { @@ -1215,6 +1236,7 @@ struct ConvertImpl() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) @@ -1991,6 +2013,7 @@ using FunctionToInt256 = FunctionConvert>; using FunctionToFloat64 = FunctionConvert>; using FunctionToDate = FunctionConvert; +using FunctionToDate32 = FunctionConvert; using FunctionToDateTime = FunctionConvert; using FunctionToDateTime32 = FunctionConvert; using FunctionToDateTime64 = FunctionConvert; @@ -2050,6 +2073,7 @@ struct NameToInt256OrZero { static constexpr auto name = "toInt256OrZero"; }; struct NameToFloat32OrZero { static constexpr auto name = "toFloat32OrZero"; }; struct NameToFloat64OrZero { static constexpr auto name = "toFloat64OrZero"; }; struct NameToDateOrZero { static constexpr auto name = "toDateOrZero"; }; +struct NameToDate32OrZero { static constexpr auto name = "toDate32OrZero"; }; struct NameToDateTimeOrZero { static constexpr auto name = "toDateTimeOrZero"; }; struct NameToDateTime64OrZero { static constexpr auto name = "toDateTime64OrZero"; }; struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; }; @@ -2073,6 +2097,7 @@ using FunctionToInt256OrZero = FunctionConvertFromString; using FunctionToFloat64OrZero = FunctionConvertFromString; using FunctionToDateOrZero = FunctionConvertFromString; +using FunctionToDate32OrZero = FunctionConvertFromString; using FunctionToDateTimeOrZero = FunctionConvertFromString; using FunctionToDateTime64OrZero = FunctionConvertFromString; using FunctionToDecimal32OrZero = FunctionConvertFromString, NameToDecimal32OrZero, ConvertFromStringExceptionMode::Zero>; @@ -2096,6 +2121,7 @@ struct NameToInt256OrNull { static constexpr auto name = "toInt256OrNull"; }; struct NameToFloat32OrNull { static constexpr auto name = "toFloat32OrNull"; }; struct NameToFloat64OrNull { static constexpr auto name = "toFloat64OrNull"; }; struct NameToDateOrNull { static constexpr auto name = "toDateOrNull"; }; +struct NameToDate32OrNull { static constexpr auto name = "toDate32OrNull"; }; struct NameToDateTimeOrNull { static constexpr auto name = "toDateTimeOrNull"; }; struct NameToDateTime64OrNull { static constexpr auto name = "toDateTime64OrNull"; }; struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; }; @@ -2119,6 +2145,7 @@ using FunctionToInt256OrNull = FunctionConvertFromString; using FunctionToFloat64OrNull = FunctionConvertFromString; using FunctionToDateOrNull = FunctionConvertFromString; +using FunctionToDate32OrNull = FunctionConvertFromString; using FunctionToDateTimeOrNull = FunctionConvertFromString; using FunctionToDateTime64OrNull = FunctionConvertFromString; using FunctionToDecimal32OrNull = FunctionConvertFromString, NameToDecimal32OrNull, ConvertFromStringExceptionMode::Null>; diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 381401be2c5..118855b4bf8 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -163,13 +163,6 @@ public: arguments[0]->getName(), getName()); - if (!WhichDataType(arguments[1]).isUInt64() && - !isTuple(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of second argument of function {} must be UInt64 or tuple(...)", - arguments[1]->getName(), - getName()); - return std::make_shared(); } @@ -189,8 +182,8 @@ public: auto dictionary_key_type = dictionary->getKeyType(); const ColumnWithTypeAndName & key_column_with_type = arguments[1]; - const auto key_column = key_column_with_type.column; - const auto key_column_type = WhichDataType(key_column_with_type.type); + auto key_column = key_column_with_type.column; + auto key_column_type = key_column_with_type.type; ColumnPtr range_col = nullptr; DataTypePtr range_col_type = nullptr; @@ -214,7 +207,7 @@ public: if (dictionary_key_type == DictionaryKeyType::simple) { - if (!key_column_type.isUInt64()) + if (!WhichDataType(key_column_type).isUInt64()) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of function {} must be UInt64 when dictionary is simple. Actual type {}.", @@ -225,24 +218,39 @@ public: } else if (dictionary_key_type == DictionaryKeyType::complex) { - if (!key_column_type.isTuple()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument of function {} must be tuple when dictionary is complex. Actual type {}.", - getName(), - key_column_with_type.type->getName()); - /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys. - ColumnPtr key_column_full = key_column->convertToFullColumnIfConst(); + key_column = key_column->convertToFullColumnIfConst(); + size_t keys_size = dictionary->getStructure().getKeysSize(); - const auto & key_columns = typeid_cast(*key_column_full).getColumnsCopy(); - const auto & key_types = static_cast(*key_column_with_type.type).getElements(); + if (!isTuple(key_column_type)) + { + if (keys_size > 1) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument of function {} must be tuple when dictionary is complex and key contains more than 1 attribute." + "Actual type {}.", + getName(), + key_column_type->getName()); + } + else + { + Columns tuple_columns = {std::move(key_column)}; + key_column = ColumnTuple::create(tuple_columns); + + DataTypes tuple_types = {key_column_type}; + key_column_type = std::make_shared(tuple_types); + } + } + + const auto & key_columns = assert_cast(*key_column).getColumnsCopy(); + const auto & key_types = assert_cast(*key_column_type).getElements(); return dictionary->hasKeys(key_columns, key_types); } else { - if (!key_column_type.isUInt64()) + if (!WhichDataType(key_column_type).isUInt64()) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of function {} must be UInt64 when dictionary is range. Actual type {}.", @@ -346,13 +354,6 @@ public: Strings attribute_names = getAttributeNamesFromColumn(arguments[1].column, arguments[1].type); auto dictionary = helper.getDictionary(dictionary_name); - - if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of third argument of function {}, must be UInt64 or tuple(...).", - arguments[2].type->getName(), - getName()); - auto dictionary_key_type = dictionary->getKeyType(); size_t current_arguments_index = 3; @@ -446,18 +447,35 @@ public: } else if (dictionary_key_type == DictionaryKeyType::complex) { - if (!isTuple(key_col_with_type.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument of function {} must be tuple when dictionary is complex. Actual type {}.", - getName(), - key_col_with_type.type->getName()); - /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys. - ColumnPtr key_column_full = key_col_with_type.column->convertToFullColumnIfConst(); + ColumnPtr key_column = key_col_with_type.column->convertToFullColumnIfConst(); + DataTypePtr key_column_type = key_col_with_type.type; - const auto & key_columns = typeid_cast(*key_column_full).getColumnsCopy(); - const auto & key_types = static_cast(*key_col_with_type.type).getElements(); + size_t keys_size = dictionary->getStructure().getKeysSize(); + + if (!isTuple(key_column_type)) + { + if (keys_size > 1) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument of function {} must be tuple when dictionary is complex and key contains more than 1 attribute." + "Actual type {}.", + getName(), + key_col_with_type.type->getName()); + } + else + { + Columns tuple_columns = {std::move(key_column)}; + key_column = ColumnTuple::create(tuple_columns); + + DataTypes tuple_types = {key_column_type}; + key_column_type = std::make_shared(tuple_types); + } + } + + const auto & key_columns = assert_cast(*key_column).getColumnsCopy(); + const auto & key_types = assert_cast(*key_column_type).getElements(); result = executeDictionaryRequest( dictionary, diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index cfa6eebf44b..d83ef024394 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -683,6 +683,8 @@ public: return executeType(arguments); else if (which.isDate()) return executeType(arguments); + else if (which.isDate32()) + return executeType(arguments); else if (which.isDateTime()) return executeType(arguments); else if (which.isDecimal32()) @@ -986,6 +988,7 @@ private: else if (which.isEnum8()) executeIntType(icolumn, vec_to); else if (which.isEnum16()) executeIntType(icolumn, vec_to); else if (which.isDate()) executeIntType(icolumn, vec_to); + else if (which.isDate32()) executeIntType(icolumn, vec_to); else if (which.isDateTime()) executeIntType(icolumn, vec_to); /// TODO: executeIntType() for Decimal32/64 leads to incompatible result else if (which.isDecimal32()) executeBigIntType(icolumn, vec_to); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index a8b1f014383..d0762ff9a8d 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -607,6 +607,8 @@ public: } }; +template +class JSONExtractRawImpl; /// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. template @@ -691,7 +693,10 @@ struct JSONExtractTree public: bool insertResultToColumn(IColumn & dest, const Element & element) override { - return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); + if (element.isString()) + return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); + else + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); } }; diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 4dbaff9f567..9a459860a68 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -755,6 +755,7 @@ struct GenericValueSource : public ValueSourceImpl { using Slice = GenericValueSlice; using SinkType = GenericArraySink; + using Column = IColumn; const IColumn * column; size_t total_rows; diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 0da55343b9d..c00baf2850b 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -113,7 +113,8 @@ public: virtual ~IFunctionBase() = default; - virtual ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run = false) const + virtual ColumnPtr execute( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run = false) const { return prepare(arguments)->execute(arguments, result_type, input_rows_count, dry_run); } @@ -161,7 +162,8 @@ public: * Arguments are passed without modifications, useDefaultImplementationForNulls, useDefaultImplementationForConstants, * useDefaultImplementationForLowCardinality are not applied. */ - virtual ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr & /* result_type */) const { return nullptr; } + virtual ColumnPtr getConstantResultForNonConstArguments( + const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr & /* result_type */) const { return nullptr; } /** Function is called "injective" if it returns different result for different values of arguments. * Example: hex, negate, tuple... @@ -358,6 +360,10 @@ public: */ virtual bool useDefaultImplementationForConstants() const { return false; } + /** Some arguments could remain constant during this implementation. + */ + virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; } + /** If function arguments has single low cardinality column and all other arguments are constants, call function on nested column. * Otherwise, convert all low cardinality columns to ordinary columns. * Returns ColumnLowCardinality if at least one argument is ColumnLowCardinality. @@ -367,10 +373,6 @@ public: /// If it isn't, will convert all ColumnLowCardinality arguments to full columns. virtual bool canBeExecutedOnLowCardinalityDictionary() const { return true; } - /** Some arguments could remain constant during this implementation. - */ - virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; } - /** True if function can be called on default arguments (include Nullable's) and won't throw. * Counterexample: modulo(0, 0) */ diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 3367b52cc36..a050de2edb6 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } @@ -304,6 +306,13 @@ void PointInPolygonWithGrid::calcGridAttributes( y_scale = 1 / cell_height; x_shift = -min_corner.x(); y_shift = -min_corner.y(); + + if (!(isFinite(x_scale) + && isFinite(y_scale) + && isFinite(x_shift) + && isFinite(y_shift) + && isFinite(grid_size))) + throw Exception("Polygon is not valid: bounding box is unbounded", ErrorCodes::BAD_ARGUMENTS); } template @@ -358,7 +367,7 @@ bool PointInPolygonWithGrid::contains(CoordinateType x, Coordina if (has_empty_bound) return false; - if (std::isnan(x) || std::isnan(y)) + if (!isFinite(x) || !isFinite(y)) return false; CoordinateType float_row = (y + y_shift) * y_scale; diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 4670d610725..08576fe59ec 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -41,6 +41,9 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!isString(arguments[0].type)) @@ -65,9 +68,7 @@ public: const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get()); FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue()); - /// FIXME: convertToFullColumnIfConst() is suboptimal - auto column = arguments[0].column->convertToFullColumnIfConst(); - if (const ColumnString * col = checkAndGetColumn(*column)) + if (const ColumnString * col = checkAndGetColumn(*arguments[0].column)) { auto col_res = ColumnString::create(); vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index f3d3f558d7b..d39a865133e 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,9 @@ private: static bool matchKeyToIndexString(const IColumn & data, const Offsets & offsets, const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs); + static bool matchKeyToIndexFixedString(const IColumn & data, const Offsets & offsets, + const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs); + static bool matchKeyToIndexStringConst(const IColumn & data, const Offsets & offsets, const Field & index, PaddedPODArray & matched_idxs); @@ -767,6 +771,19 @@ struct MatcherString } }; +struct MatcherFixedString +{ + const ColumnFixedString & data; + const ColumnFixedString & index; + + bool match(size_t row_data, size_t row_index) const + { + auto data_ref = data.getDataAt(row_data); + auto index_ref = index.getDataAt(row_index); + return memequalSmallAllowOverflow15(index_ref.data, index_ref.size, data_ref.data, data_ref.size); + } +}; + struct MatcherStringConst { const ColumnString & data; @@ -863,6 +880,23 @@ bool FunctionArrayElement::matchKeyToIndexString( return true; } +bool FunctionArrayElement::matchKeyToIndexFixedString( + const IColumn & data, const Offsets & offsets, + const ColumnsWithTypeAndName & arguments, PaddedPODArray & matched_idxs) +{ + const auto * index_string = checkAndGetColumn(arguments[1].column.get()); + if (!index_string) + return false; + + const auto * data_string = checkAndGetColumn(&data); + if (!data_string) + return false; + + MatcherFixedString matcher{*data_string, *index_string}; + executeMatchKeyToIndex(offsets, matched_idxs, matcher); + return true; +} + template bool FunctionArrayElement::matchKeyToIndexNumberConst( const IColumn & data, const Offsets & offsets, @@ -922,8 +956,10 @@ bool FunctionArrayElement::matchKeyToIndex( || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) + || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) || matchKeyToIndexNumber(data, offsets, arguments, matched_idxs) - || matchKeyToIndexString(data, offsets, arguments, matched_idxs); + || matchKeyToIndexString(data, offsets, arguments, matched_idxs) + || matchKeyToIndexFixedString(data, offsets, arguments, matched_idxs); } bool FunctionArrayElement::matchKeyToIndexConst( diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 6627e18b3c4..4ed2b212875 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -407,6 +408,9 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arg using DateMap = ClearableHashMapWithStackMemory, INITIAL_SIZE_DEGREE>; + using Date32Map = ClearableHashMapWithStackMemory, INITIAL_SIZE_DEGREE>; + using DateTimeMap = ClearableHashMapWithStackMemory< DataTypeDateTime::FieldType, size_t, DefaultHash, INITIAL_SIZE_DEGREE>; @@ -421,6 +425,8 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arg if (which.isDate()) result_column = execute, true>(arrays, std::move(column)); + else if (which.isDate32()) + result_column = execute, true>(arrays, std::move(column)); else if (which.isDateTime()) result_column = execute, true>(arrays, std::move(column)); else if (which.isString()) diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp index 7edb3faf62d..2dad8fc13f2 100644 --- a/src/Functions/geoToH3.cpp +++ b/src/Functions/geoToH3.cpp @@ -21,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int INCORRECT_DATA; } namespace @@ -79,11 +80,14 @@ public: const double lat = col_lat->getFloat64(row); const UInt8 res = col_res->getUInt(row); - GeoCoord coord; - coord.lon = degsToRads(lon); + LatLng coord; + coord.lng = degsToRads(lon); coord.lat = degsToRads(lat); - H3Index hindex = geoToH3(&coord, res); + H3Index hindex; + H3Error err = latLngToCell(&coord, res, &hindex); + if (err) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect coordinates latitude: {}, longitude: {}, error: {}", coord.lat, coord.lng, err); dst_data[row] = hindex; } diff --git a/src/Functions/h3EdgeAngle.cpp b/src/Functions/h3EdgeAngle.cpp index 0fdafff9eed..071581a7c60 100644 --- a/src/Functions/h3EdgeAngle.cpp +++ b/src/Functions/h3EdgeAngle.cpp @@ -66,7 +66,7 @@ public: + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); // Numerical constant is 180 degrees / pi / Earth radius, Earth radius is from h3 sources - Float64 res = 8.99320592271288084e-6 * edgeLengthM(resolution); + Float64 res = 8.99320592271288084e-6 * getHexagonEdgeLengthAvgM(resolution); dst_data[row] = res; } diff --git a/src/Functions/h3EdgeLengthM.cpp b/src/Functions/h3EdgeLengthM.cpp index 5ec57510e54..56374e10077 100644 --- a/src/Functions/h3EdgeLengthM.cpp +++ b/src/Functions/h3EdgeLengthM.cpp @@ -70,7 +70,7 @@ public: throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - Float64 res = edgeLengthM(resolution); + Float64 res = getHexagonEdgeLengthAvgM(resolution); dst_data[row] = res; } diff --git a/src/Functions/h3GetBaseCell.cpp b/src/Functions/h3GetBaseCell.cpp index 7f3843ed792..b73245f751b 100644 --- a/src/Functions/h3GetBaseCell.cpp +++ b/src/Functions/h3GetBaseCell.cpp @@ -59,7 +59,7 @@ public: { const UInt64 hindex = col_hindex->getUInt(row); - UInt8 res = h3GetBaseCell(hindex); + UInt8 res = getBaseCellNumber(hindex); dst_data[row] = res; } diff --git a/src/Functions/h3GetResolution.cpp b/src/Functions/h3GetResolution.cpp index 074e07e4277..49ade509934 100644 --- a/src/Functions/h3GetResolution.cpp +++ b/src/Functions/h3GetResolution.cpp @@ -59,7 +59,7 @@ public: { const UInt64 hindex = col_hindex->getUInt(row); - UInt8 res = h3GetResolution(hindex); + UInt8 res = getResolution(hindex); dst_data[row] = res; } diff --git a/src/Functions/h3HexAreaM2.cpp b/src/Functions/h3HexAreaM2.cpp index e630fb7bd70..7f41348a14b 100644 --- a/src/Functions/h3HexAreaM2.cpp +++ b/src/Functions/h3HexAreaM2.cpp @@ -65,7 +65,7 @@ public: throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - Float64 res = hexAreaM2(resolution); + Float64 res = getHexagonAreaAvgM2(resolution); dst_data[row] = res; } diff --git a/src/Functions/h3IndexesAreNeighbors.cpp b/src/Functions/h3IndexesAreNeighbors.cpp index 3c03d3d1adb..6507998e24c 100644 --- a/src/Functions/h3IndexesAreNeighbors.cpp +++ b/src/Functions/h3IndexesAreNeighbors.cpp @@ -67,7 +67,7 @@ public: const UInt64 hindex_origin = col_hindex_origin->getUInt(row); const UInt64 hindex_dest = col_hindex_dest->getUInt(row); - UInt8 res = h3IndexesAreNeighbors(hindex_origin, hindex_dest); + UInt8 res = areNeighborCells(hindex_origin, hindex_dest); dst_data[row] = res; } diff --git a/src/Functions/h3IsValid.cpp b/src/Functions/h3IsValid.cpp index d7f5a2c0771..bc140450b71 100644 --- a/src/Functions/h3IsValid.cpp +++ b/src/Functions/h3IsValid.cpp @@ -59,7 +59,7 @@ public: { const UInt64 hindex = col_hindex->getUInt(row); - UInt8 is_valid = h3IsValid(hindex) == 0 ? 0 : 1; + UInt8 is_valid = isValidCell(hindex) == 0 ? 0 : 1; dst_data[row] = is_valid; } diff --git a/src/Functions/h3ToChildren.cpp b/src/Functions/h3ToChildren.cpp index d472c298432..88ac3056e72 100644 --- a/src/Functions/h3ToChildren.cpp +++ b/src/Functions/h3ToChildren.cpp @@ -84,14 +84,14 @@ public: throw Exception("The argument 'resolution' (" + toString(child_resolution) + ") of function " + getName() + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - const size_t vec_size = maxH3ToChildrenSize(parent_hindex, child_resolution); + const size_t vec_size = cellToChildrenSize(parent_hindex, child_resolution); if (vec_size > MAX_ARRAY_SIZE) throw Exception("The result of function" + getName() + " (array of " + toString(vec_size) + " elements) will be too large with resolution argument = " + toString(child_resolution), ErrorCodes::TOO_LARGE_ARRAY_SIZE); hindex_vec.resize(vec_size); - h3ToChildren(parent_hindex, child_resolution, hindex_vec.data()); + cellToChildren(parent_hindex, child_resolution, hindex_vec.data()); dst_data.reserve(dst_data.size() + vec_size); for (auto hindex : hindex_vec) diff --git a/src/Functions/h3ToParent.cpp b/src/Functions/h3ToParent.cpp index 6719d9f3456..9755184d63c 100644 --- a/src/Functions/h3ToParent.cpp +++ b/src/Functions/h3ToParent.cpp @@ -74,7 +74,7 @@ public: throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - UInt64 res = h3ToParent(hindex, resolution); + UInt64 res = cellToParent(hindex, resolution); dst_data[row] = res; } diff --git a/src/Functions/h3ToString.cpp b/src/Functions/h3ToString.cpp index dcd0951f67f..8ac97db0621 100644 --- a/src/Functions/h3ToString.cpp +++ b/src/Functions/h3ToString.cpp @@ -66,7 +66,7 @@ public: { const UInt64 hindex = col_hindex->getUInt(i); - if (!h3IsValid(hindex)) + if (!isValidCell(hindex)) { throw Exception("Invalid H3 index: " + std::to_string(hindex), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } diff --git a/src/Functions/h3kRing.cpp b/src/Functions/h3kRing.cpp index b54ed48ef3f..8b91f2fa1c7 100644 --- a/src/Functions/h3kRing.cpp +++ b/src/Functions/h3kRing.cpp @@ -77,7 +77,7 @@ public: const H3Index origin_hindex = col_hindex->getUInt(row); const int k = col_k->getInt(row); - /// Overflow is possible. The function maxKringSize does not check for overflow. + /// Overflow is possible. The function maxGridDiskSize does not check for overflow. /// The calculation is similar to square of k but several times more. /// Let's use huge underestimation as the safe bound. We should not allow to generate too large arrays nevertheless. constexpr auto max_k = 10000; @@ -86,9 +86,9 @@ public: if (k < 0) throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Argument 'k' for {} function must be non negative", getName()); - const auto vec_size = maxKringSize(k); + const auto vec_size = maxGridDiskSize(k); hindex_vec.resize(vec_size); - kRing(origin_hindex, k, hindex_vec.data()); + gridDisk(origin_hindex, k, hindex_vec.data()); dst_data.reserve(dst_data.size() + vec_size); for (auto hindex : hindex_vec) diff --git a/src/Functions/padString.cpp b/src/Functions/padString.cpp new file mode 100644 index 00000000000..c03733a1198 --- /dev/null +++ b/src/Functions/padString.cpp @@ -0,0 +1,319 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +using namespace GatherUtils; + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TOO_LARGE_STRING_SIZE; +} + +namespace +{ + /// The maximum new padded length. + constexpr size_t MAX_NEW_LENGTH = 1000000; + + /// Appends padding characters to a sink based on a pad string. + /// Depending on how many padding characters are required to add + /// the pad string can be copied only partly or be repeated multiple times. + template + class PaddingChars + { + public: + explicit PaddingChars(const String & pad_string_) : pad_string(pad_string_) { init(); } + + ALWAYS_INLINE size_t numCharsInPadString() const + { + if constexpr (is_utf8) + return utf8_offsets.size() - 1; + else + return pad_string.length(); + } + + ALWAYS_INLINE size_t numCharsToNumBytes(size_t count) const + { + if constexpr (is_utf8) + return utf8_offsets[count]; + else + return count; + } + + void appendTo(StringSink & res_sink, size_t num_chars) const + { + if (!num_chars) + return; + + const size_t step = numCharsInPadString(); + while (true) + { + if (num_chars <= step) + { + writeSlice(StringSource::Slice{bit_cast(pad_string.data()), numCharsToNumBytes(num_chars)}, res_sink); + break; + } + writeSlice(StringSource::Slice{bit_cast(pad_string.data()), numCharsToNumBytes(step)}, res_sink); + num_chars -= step; + } + } + + private: + void init() + { + if (pad_string.empty()) + pad_string = " "; + + if constexpr (is_utf8) + { + size_t offset = 0; + utf8_offsets.reserve(pad_string.length() + 1); + while (true) + { + utf8_offsets.push_back(offset); + if (offset == pad_string.length()) + break; + offset += UTF8::seqLength(pad_string[offset]); + if (offset > pad_string.length()) + offset = pad_string.length(); + } + } + + /// Not necessary, but good for performance. + /// We repeat `pad_string` multiple times until it's length becomes 16 or more. + /// It speeds up the function appendTo() because it allows to copy padding characters by portions of at least + /// 16 bytes instead of single bytes. + while (numCharsInPadString() < 16) + { + pad_string += pad_string; + if constexpr (is_utf8) + { + size_t old_size = utf8_offsets.size(); + utf8_offsets.reserve((old_size - 1) * 2); + size_t base = utf8_offsets.back(); + for (size_t i = 1; i != old_size; ++i) + utf8_offsets.push_back(utf8_offsets[i] + base); + } + } + } + + String pad_string; + + /// Offsets of code points in `pad_string`: + /// utf8_offsets[0] is the offset of the first code point in `pad_string`, it's always 0; + /// utf8_offsets[1] is the offset of the second code point in `pad_string`; + /// utf8_offsets[2] is the offset of the third code point in `pad_string`; + /// ... + std::vector utf8_offsets; + }; + + /// Returns the number of characters in a slice. + template + inline ALWAYS_INLINE size_t getLengthOfSlice(const StringSource::Slice & slice) + { + if constexpr (is_utf8) + return UTF8::countCodePoints(slice.data, slice.size); + else + return slice.size; + } + + /// Moves the end of a slice back by n characters. + template + inline ALWAYS_INLINE StringSource::Slice removeSuffixFromSlice(const StringSource::Slice & slice, size_t suffix_length) + { + StringSource::Slice res = slice; + if constexpr (is_utf8) + res.size = UTF8StringSource::skipCodePointsBackward(slice.data + slice.size, suffix_length, slice.data) - res.data; + else + res.size -= std::min(suffix_length, res.size); + return res; + } + + /// If `is_right_pad` - it's the rightPad() function instead of leftPad(). + /// If `is_utf8` - lengths are measured in code points instead of bytes. + template + class FunctionPadString : public IFunction + { + public: + static constexpr auto name = is_right_pad ? (is_utf8 ? "rightPadUTF8" : "rightPad") : (is_utf8 ? "leftPadUTF8" : "leftPad"); + static FunctionPtr create(const ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForConstants() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + size_t number_of_arguments = arguments.size(); + + if (number_of_arguments != 2 && number_of_arguments != 3) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), + std::to_string(number_of_arguments)); + + if (!isStringOrFixedString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of the first argument of function {}, should be string", + arguments[0]->getName(), + getName()); + + if (!isUnsignedInteger(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of the second argument of function {}, should be unsigned integer", + arguments[1]->getName(), + getName()); + + if (number_of_arguments == 3 && !isStringOrFixedString(arguments[2])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of the third argument of function {}, should be const string", + arguments[2]->getName(), + getName()); + + return arguments[0]; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto column_string = arguments[0].column; + auto column_length = arguments[1].column; + + String pad_string; + if (arguments.size() == 3) + { + auto column_pad = arguments[2].column; + const ColumnConst * column_pad_const = checkAndGetColumnConst(column_pad.get()); + if (!column_pad_const) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {}, third argument of function {} must be a constant string", + column_pad->getName(), + getName()); + + pad_string = column_pad_const->getValue(); + } + PaddingChars padding_chars{pad_string}; + + auto col_res = ColumnString::create(); + StringSink res_sink{*col_res, input_rows_count}; + + if (const ColumnString * col = checkAndGetColumn(column_string.get())) + executeForSource(StringSource{*col}, column_length, padding_chars, res_sink); + else if (const ColumnFixedString * col_fixed = checkAndGetColumn(column_string.get())) + executeForSource(FixedStringSource{*col_fixed}, column_length, padding_chars, res_sink); + else if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) + executeForSource(ConstSource{*col_const}, column_length, padding_chars, res_sink); + else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst(column_string.get())) + executeForSource(ConstSource{*col_const_fixed}, column_length, padding_chars, res_sink); + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {}, first argument of function {} must be a string", + arguments[0].column->getName(), + getName()); + + return col_res; + } + + private: + template + void executeForSource( + SourceStrings && strings, + const ColumnPtr & column_length, + const PaddingChars & padding_chars, + StringSink & res_sink) const + { + if (const auto * col_const = checkAndGetColumn(column_length.get())) + executeForSourceAndLength(std::forward(strings), ConstSource{*col_const}, padding_chars, res_sink); + else + executeForSourceAndLength(std::forward(strings), GenericValueSource{*column_length}, padding_chars, res_sink); + } + + template + void executeForSourceAndLength( + SourceStrings && strings, + SourceLengths && lengths, + const PaddingChars & padding_chars, + StringSink & res_sink) const + { + bool is_const_new_length = lengths.isConst(); + size_t new_length = 0; + + /// Insert padding characters to each string from `strings`, write the result strings into `res_sink`. + /// If for some input string its current length is greater than the specified new length then that string + /// will be trimmed to the specified new length instead of padding. + for (; !res_sink.isEnd(); res_sink.next(), strings.next(), lengths.next()) + { + auto str = strings.getWhole(); + size_t current_length = getLengthOfSlice(str); + + if (!res_sink.rowNum() || !is_const_new_length) + { + /// If `is_const_new_length` is true we can get and check the new length only once. + auto new_length_slice = lengths.getWhole(); + new_length = new_length_slice.elements->getUInt(new_length_slice.position); + if (new_length > MAX_NEW_LENGTH) + { + throw Exception( + "New padded length (" + std::to_string(new_length) + ") is too big, maximum is: " + std::to_string(MAX_NEW_LENGTH), + ErrorCodes::TOO_LARGE_STRING_SIZE); + } + if (is_const_new_length) + { + size_t rows_count = res_sink.offsets.size(); + res_sink.reserve((new_length + 1 /* zero terminator */) * rows_count); + } + } + + if (new_length == current_length) + { + writeSlice(str, res_sink); + } + else if (new_length < current_length) + { + str = removeSuffixFromSlice(str, current_length - new_length); + writeSlice(str, res_sink); + } + else if (new_length > current_length) + { + if constexpr (!is_right_pad) + padding_chars.appendTo(res_sink, new_length - current_length); + + writeSlice(str, res_sink); + + if constexpr (is_right_pad) + padding_chars.appendTo(res_sink, new_length - current_length); + } + } + } + }; +} + +void registerFunctionPadString(FunctionFactory & factory) +{ + factory.registerFunction>(); /// leftPad + factory.registerFunction>(); /// leftPadUTF8 + factory.registerFunction>(); /// rightPad + factory.registerFunction>(); /// rightPadUTF8 + + factory.registerAlias("lpad", "leftPad", FunctionFactory::CaseInsensitive); + factory.registerAlias("rpad", "rightPad", FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/registerFunctionsString.cpp b/src/Functions/registerFunctionsString.cpp index f6f95489f82..18a30469386 100644 --- a/src/Functions/registerFunctionsString.cpp +++ b/src/Functions/registerFunctionsString.cpp @@ -29,6 +29,7 @@ void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &); void registerFunctionStartsWith(FunctionFactory &); void registerFunctionEndsWith(FunctionFactory &); void registerFunctionTrim(FunctionFactory &); +void registerFunctionPadString(FunctionFactory &); void registerFunctionRegexpQuoteMeta(FunctionFactory &); void registerFunctionNormalizeQuery(FunctionFactory &); void registerFunctionNormalizedQueryHash(FunctionFactory &); @@ -68,6 +69,7 @@ void registerFunctionsString(FunctionFactory & factory) registerFunctionStartsWith(factory); registerFunctionEndsWith(factory); registerFunctionTrim(factory); + registerFunctionPadString(factory); registerFunctionRegexpQuoteMeta(factory); registerFunctionNormalizeQuery(factory); registerFunctionNormalizedQueryHash(factory); diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index f87c57af043..52796653d32 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,8 @@ private: f(Float64()); else if (which.isDate()) f(DataTypeDate::FieldType()); + else if (which.isDate32()) + f(DataTypeDate::FieldType()); else if (which.isDateTime()) f(DataTypeDateTime::FieldType()); else diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 6d5f880f744..ab87836de35 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,11 @@ namespace return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); } + static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); + } + static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); @@ -60,6 +66,11 @@ namespace return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); } + static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); + } + static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); @@ -81,6 +92,11 @@ namespace return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); } + static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); + } + static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); @@ -102,6 +118,11 @@ namespace return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); } + static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); + } + static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); @@ -123,6 +144,11 @@ namespace return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } + static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); + } + static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); @@ -140,6 +166,7 @@ namespace static constexpr auto name = function_name; static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } }; @@ -151,6 +178,8 @@ namespace static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone) { return time_zone.toStartOfMinuteInterval(t, minutes); @@ -169,6 +198,8 @@ namespace static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) { return time_zone.toStartOfSecondInterval(t, seconds); @@ -299,6 +330,12 @@ private: if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); } + if (which_type.isDate32()) + { + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + if (time_column_vec) + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + } if (which_type.isDateTime64()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); diff --git a/src/Functions/ya.make b/src/Functions/ya.make index d6da7eadd35..2db4a7645a1 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -4,7 +4,7 @@ OWNER(g:clickhouse) LIBRARY() CFLAGS( - -DUSE_H3 -DUSE_SSL -DUSE_XXHASH + -DUSE_SSL -DUSE_XXHASH ) ADDINCL( @@ -387,6 +387,7 @@ SRCS( now.cpp now64.cpp nullIf.cpp + padString.cpp partitionId.cpp pi.cpp plus.cpp diff --git a/src/Functions/ya.make.in b/src/Functions/ya.make.in index f75773fb47e..b21bf64304a 100644 --- a/src/Functions/ya.make.in +++ b/src/Functions/ya.make.in @@ -3,7 +3,7 @@ OWNER(g:clickhouse) LIBRARY() CFLAGS( - -DUSE_H3 -DUSE_SSL -DUSE_XXHASH + -DUSE_SSL -DUSE_XXHASH ) ADDINCL( diff --git a/src/IO/HashingReadBuffer.h b/src/IO/HashingReadBuffer.h index 08b6de69dcb..5d42c64478c 100644 --- a/src/IO/HashingReadBuffer.h +++ b/src/IO/HashingReadBuffer.h @@ -34,7 +34,7 @@ private: working_buffer = in.buffer(); pos = in.position(); - // `pos` may be different from working_buffer.begin() when using AIO. + // `pos` may be different from working_buffer.begin() when using sophisticated ReadBuffers. calculateHash(pos, working_buffer.end() - pos); return res; diff --git a/src/IO/ReadBufferAIO.cpp b/src/IO/ReadBufferAIO.cpp deleted file mode 100644 index c064e0d4ed9..00000000000 --- a/src/IO/ReadBufferAIO.cpp +++ /dev/null @@ -1,312 +0,0 @@ -#if defined(OS_LINUX) || defined(__FreeBSD__) - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - - -namespace ProfileEvents -{ - extern const Event FileOpen; - extern const Event ReadBufferAIORead; - extern const Event ReadBufferAIOReadBytes; -} - -namespace CurrentMetrics -{ - extern const Metric Read; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int LOGICAL_ERROR; - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int AIO_READ_ERROR; -} - - -/// Note: an additional page is allocated that will contain the data that -/// does not fit into the main buffer. -ReadBufferAIO::ReadBufferAIO(const std::string & filename_, size_t buffer_size_, int flags_, char * existing_memory_) - : ReadBufferFromFileBase(buffer_size_ + DEFAULT_AIO_FILE_BLOCK_SIZE, existing_memory_, DEFAULT_AIO_FILE_BLOCK_SIZE), - fill_buffer(BufferWithOwnMemory(internalBuffer().size(), nullptr, DEFAULT_AIO_FILE_BLOCK_SIZE)), - filename(filename_) -{ - ProfileEvents::increment(ProfileEvents::FileOpen); - - int open_flags = (flags_ == -1) ? O_RDONLY : flags_; - open_flags |= O_DIRECT; - open_flags |= O_CLOEXEC; - - fd = ::open(filename.c_str(), open_flags); - if (fd == -1) - { - auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; - throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); - } -} - -ReadBufferAIO::~ReadBufferAIO() -{ - if (!aio_failed) - { - try - { - (void) waitForAIOCompletion(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - if (fd != -1) - ::close(fd); -} - -void ReadBufferAIO::setMaxBytes(size_t max_bytes_read_) -{ - if (is_started) - throw Exception("Illegal attempt to set the maximum number of bytes to read from file " + filename, ErrorCodes::LOGICAL_ERROR); - max_bytes_read = max_bytes_read_; -} - -bool ReadBufferAIO::nextImpl() -{ - /// If the end of the file has already been reached by calling this function, - /// then the current call is wrong. - if (is_eof) - return false; - - std::optional watch; - if (profile_callback) - watch.emplace(clock_type); - - if (!is_pending_read) - synchronousRead(); - else - receive(); - - if (profile_callback) - { - ProfileInfo info; - info.bytes_requested = requested_byte_count; - info.bytes_read = bytes_read; - info.nanoseconds = watch->elapsed(); //-V1007 - profile_callback(info); - } - - is_started = true; - - /// If the end of the file is just reached, do nothing else. - if (is_eof) - return bytes_read != 0; - - /// Create an asynchronous request. - prepare(); - -#if defined(__FreeBSD__) - request.aio.aio_lio_opcode = LIO_READ; - request.aio.aio_fildes = fd; - request.aio.aio_buf = reinterpret_cast(buffer_begin); - request.aio.aio_nbytes = region_aligned_size; - request.aio.aio_offset = region_aligned_begin; -#else - request.aio_lio_opcode = IOCB_CMD_PREAD; - request.aio_fildes = fd; - request.aio_buf = reinterpret_cast(buffer_begin); - request.aio_nbytes = region_aligned_size; - request.aio_offset = region_aligned_begin; -#endif - - /// Send the request. - try - { - future_bytes_read = AIOContextPool::instance().post(request); - } - catch (...) - { - aio_failed = true; - throw; - } - - is_pending_read = true; - return true; -} - -off_t ReadBufferAIO::seek(off_t off, int whence) -{ - off_t new_pos_in_file; - - if (whence == SEEK_SET) - { - if (off < 0) - throw Exception("SEEK_SET underflow", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - new_pos_in_file = off; - } - else if (whence == SEEK_CUR) - { - if (off >= 0) - { - if (off > (std::numeric_limits::max() - getPosition())) - throw Exception("SEEK_CUR overflow", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - } - else if (off < -getPosition()) - throw Exception("SEEK_CUR underflow", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - new_pos_in_file = getPosition() + off; - } - else - throw Exception("ReadBufferAIO::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - if (new_pos_in_file != getPosition()) - { - off_t first_read_pos_in_file = first_unread_pos_in_file - static_cast(working_buffer.size()); - if (hasPendingData() && (new_pos_in_file >= first_read_pos_in_file) && (new_pos_in_file <= first_unread_pos_in_file)) - { - /// Moved, but remained within the buffer. - pos = working_buffer.begin() + (new_pos_in_file - first_read_pos_in_file); - } - else - { - /// Moved past the buffer. - pos = working_buffer.end(); - first_unread_pos_in_file = new_pos_in_file; - - /// If we go back, than it's not eof - is_eof = false; - - /// We can not use the result of the current asynchronous request. - skip(); - } - } - - return new_pos_in_file; -} - -void ReadBufferAIO::synchronousRead() -{ - CurrentMetrics::Increment metric_increment_read{CurrentMetrics::Read}; - - prepare(); - bytes_read = ::pread(fd, buffer_begin, region_aligned_size, region_aligned_begin); - - ProfileEvents::increment(ProfileEvents::ReadBufferAIORead); - ProfileEvents::increment(ProfileEvents::ReadBufferAIOReadBytes, bytes_read); - - finalize(); -} - -void ReadBufferAIO::receive() -{ - if (!waitForAIOCompletion()) - { - throw Exception("Trying to receive data from AIO, but nothing was queued. It's a bug", ErrorCodes::LOGICAL_ERROR); - } - finalize(); -} - -void ReadBufferAIO::skip() -{ - if (!waitForAIOCompletion()) - return; - - /// @todo I presume this assignment is redundant since waitForAIOCompletion() performs a similar one -// bytes_read = future_bytes_read.get(); - if ((bytes_read < 0) || (static_cast(bytes_read) < region_left_padding)) - throw Exception("Asynchronous read error on file " + filename, ErrorCodes::AIO_READ_ERROR); -} - -bool ReadBufferAIO::waitForAIOCompletion() -{ - if (is_eof || !is_pending_read) - return false; - - CurrentMetrics::Increment metric_increment_read{CurrentMetrics::Read}; - - bytes_read = future_bytes_read.get(); - is_pending_read = false; - - ProfileEvents::increment(ProfileEvents::ReadBufferAIORead); - ProfileEvents::increment(ProfileEvents::ReadBufferAIOReadBytes, bytes_read); - - return true; -} - -void ReadBufferAIO::prepare() -{ - requested_byte_count = std::min(fill_buffer.internalBuffer().size() - DEFAULT_AIO_FILE_BLOCK_SIZE, max_bytes_read); - - /// Region of the disk from which we want to read data. - const off_t region_begin = first_unread_pos_in_file; - - if ((requested_byte_count > static_cast(std::numeric_limits::max())) || - (first_unread_pos_in_file > (std::numeric_limits::max() - static_cast(requested_byte_count)))) - throw Exception("An overflow occurred during file operation", ErrorCodes::LOGICAL_ERROR); - - const off_t region_end = first_unread_pos_in_file + requested_byte_count; - - /// The aligned region of the disk from which we will read the data. - region_left_padding = region_begin % DEFAULT_AIO_FILE_BLOCK_SIZE; - const size_t region_right_padding = (DEFAULT_AIO_FILE_BLOCK_SIZE - (region_end % DEFAULT_AIO_FILE_BLOCK_SIZE)) % DEFAULT_AIO_FILE_BLOCK_SIZE; - - region_aligned_begin = region_begin - region_left_padding; - - if (region_end > (std::numeric_limits::max() - static_cast(region_right_padding))) - throw Exception("An overflow occurred during file operation", ErrorCodes::LOGICAL_ERROR); - - const off_t region_aligned_end = region_end + region_right_padding; - region_aligned_size = region_aligned_end - region_aligned_begin; - - buffer_begin = fill_buffer.internalBuffer().begin(); - - /// Unpoison because msan doesn't instrument linux AIO - __msan_unpoison(buffer_begin, fill_buffer.internalBuffer().size()); -} - -void ReadBufferAIO::finalize() -{ - if ((bytes_read < 0) || (static_cast(bytes_read) < region_left_padding)) - throw Exception("Asynchronous read error on file " + filename, ErrorCodes::AIO_READ_ERROR); - - /// Ignore redundant bytes on the left. - bytes_read -= region_left_padding; - - /// Ignore redundant bytes on the right. - bytes_read = std::min(static_cast(bytes_read), static_cast(requested_byte_count)); - - if (bytes_read > 0) - fill_buffer.buffer().resize(region_left_padding + bytes_read); - if (static_cast(bytes_read) < requested_byte_count) - is_eof = true; - - if (first_unread_pos_in_file > (std::numeric_limits::max() - bytes_read)) - throw Exception("An overflow occurred during file operation", ErrorCodes::LOGICAL_ERROR); - - first_unread_pos_in_file += bytes_read; - total_bytes_read += bytes_read; - nextimpl_working_buffer_offset = region_left_padding; - - if (total_bytes_read == max_bytes_read) - is_eof = true; - - /// Swap the main and duplicate buffers. - swap(fill_buffer); -} - -} - -#endif diff --git a/src/IO/ReadBufferAIO.h b/src/IO/ReadBufferAIO.h deleted file mode 100644 index d476865747d..00000000000 --- a/src/IO/ReadBufferAIO.h +++ /dev/null @@ -1,111 +0,0 @@ -#pragma once - -#if defined(OS_LINUX) || defined(__FreeBSD__) - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace CurrentMetrics -{ - extern const Metric OpenFileForRead; -} - -namespace DB -{ - -/** Class for asynchronous data reading. - */ -class ReadBufferAIO final : public ReadBufferFromFileBase -{ -public: - ReadBufferAIO(const std::string & filename_, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, int flags_ = -1, - char * existing_memory_ = nullptr); - ~ReadBufferAIO() override; - - ReadBufferAIO(const ReadBufferAIO &) = delete; - ReadBufferAIO & operator=(const ReadBufferAIO &) = delete; - - void setMaxBytes(size_t max_bytes_read_); - off_t getPosition() override { return first_unread_pos_in_file - (working_buffer.end() - pos); } - std::string getFileName() const override { return filename; } - int getFD() const { return fd; } - - off_t seek(off_t off, int whence) override; - -private: - /// - bool nextImpl() override; - /// Synchronously read the data. - void synchronousRead(); - /// Get data from an asynchronous request. - void receive(); - /// Ignore data from an asynchronous request. - void skip(); - /// Wait for the end of the current asynchronous task. - bool waitForAIOCompletion(); - /// Prepare the request. - void prepare(); - /// Prepare for reading a duplicate buffer containing data from - /// of the last request. - void finalize(); - -private: - /// Buffer for asynchronous data read operations. - BufferWithOwnMemory fill_buffer; - - /// Description of the asynchronous read request. - iocb request{}; - std::future future_bytes_read; - - const std::string filename; - - /// The maximum number of bytes that can be read. - size_t max_bytes_read = std::numeric_limits::max(); - /// Number of bytes requested. - size_t requested_byte_count = 0; - /// The number of bytes read at the last request. - ssize_t bytes_read = 0; - /// The total number of bytes read. - size_t total_bytes_read = 0; - - /// The position of the first unread byte in the file. - off_t first_unread_pos_in_file = 0; - - /// The starting position of the aligned region of the disk from which the data is read. - off_t region_aligned_begin = 0; - /// Left offset to align the region of the disk. - size_t region_left_padding = 0; - /// The size of the aligned region of the disk. - size_t region_aligned_size = 0; - - /// The file descriptor for read. - int fd = -1; - - /// The buffer to which the received data is written. - Position buffer_begin = nullptr; - - /// The asynchronous read operation is not yet completed. - bool is_pending_read = false; - /// The end of the file is reached. - bool is_eof = false; - /// At least one read request was sent. - bool is_started = false; - /// Did the asynchronous operation fail? - bool aio_failed = false; - - CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead}; -}; - -} - -#endif diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h index 33365bc7ceb..676f53afeb8 100644 --- a/src/IO/ReadBufferFromFile.h +++ b/src/IO/ReadBufferFromFile.h @@ -46,4 +46,18 @@ public: } }; + +/** Similar to ReadBufferFromFile but it is using 'pread' instead of 'read'. + */ +class ReadBufferFromFilePRead : public ReadBufferFromFile +{ +public: + ReadBufferFromFilePRead(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, + char * existing_memory = nullptr, size_t alignment = 0) + : ReadBufferFromFile(file_name_, buf_size, flags, existing_memory, alignment) + { + use_pread = true; + } +}; + } diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 893c2bcb5d8..fdb538d4a49 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -59,7 +59,11 @@ bool ReadBufferFromFileDescriptor::nextImpl() ssize_t res = 0; { CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; - res = ::read(fd, internal_buffer.begin(), internal_buffer.size()); + + if (use_pread) + res = ::pread(fd, internal_buffer.begin(), internal_buffer.size(), file_offset_of_buffer_end); + else + res = ::read(fd, internal_buffer.begin(), internal_buffer.size()); } if (!res) break; @@ -128,12 +132,13 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end) return new_pos; - // file_offset_of_buffer_end corresponds to working_buffer.end(); it's a past-the-end pos, - // so the second inequality is strict. + /// file_offset_of_buffer_end corresponds to working_buffer.end(); it's a past-the-end pos, + /// so the second inequality is strict. if (file_offset_of_buffer_end - working_buffer.size() <= static_cast(new_pos) && new_pos < file_offset_of_buffer_end) { - /// Position is still inside buffer. + /// Position is still inside the buffer. + pos = working_buffer.end() - file_offset_of_buffer_end + new_pos; assert(pos >= working_buffer.begin()); assert(pos < working_buffer.end()); @@ -142,35 +147,66 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) } else { - ProfileEvents::increment(ProfileEvents::Seek); - Stopwatch watch(profile_callback ? clock_type : CLOCK_MONOTONIC); + /// Position is out of the buffer, we need to do real seek. + off_t seek_pos = required_alignment > 1 + ? new_pos / required_alignment * required_alignment + : new_pos; + off_t offset_after_seek_pos = new_pos - seek_pos; + + /// First put position at the end of the buffer so the next read will fetch new data to the buffer. pos = working_buffer.end(); - off_t res = ::lseek(fd, new_pos, SEEK_SET); - if (-1 == res) - throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), - ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - file_offset_of_buffer_end = new_pos; - watch.stop(); - ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); + /// In case of using 'pread' we just update the info about the next position in file. + /// In case of using 'read' we call 'lseek'. - return res; + /// We account both cases as seek event as it leads to non-contiguous reads from file. + ProfileEvents::increment(ProfileEvents::Seek); + + if (!use_pread) + { + Stopwatch watch(profile_callback ? clock_type : CLOCK_MONOTONIC); + + off_t res = ::lseek(fd, seek_pos, SEEK_SET); + if (-1 == res) + throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + + /// Also note that seeking past the file size is not allowed. + if (res != seek_pos) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, + "The 'lseek' syscall returned value ({}) that is not expected ({})", res, seek_pos); + + watch.stop(); + ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); + } + + file_offset_of_buffer_end = seek_pos; + + if (offset_after_seek_pos > 0) + ignore(offset_after_seek_pos); + + return seek_pos; } } void ReadBufferFromFileDescriptor::rewind() { - ProfileEvents::increment(ProfileEvents::Seek); - off_t res = ::lseek(fd, 0, SEEK_SET); - if (-1 == res) - throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), - ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + if (!use_pread) + { + ProfileEvents::increment(ProfileEvents::Seek); + off_t res = ::lseek(fd, 0, SEEK_SET); + if (-1 == res) + throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + } + /// In case of pread, the ProfileEvents::Seek is not accounted, but it's Ok. /// Clearing the buffer with existing data. New data will be read on subsequent call to 'next'. working_buffer.resize(0); pos = working_buffer.begin(); + file_offset_of_buffer_end = 0; } diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 1883c6802bc..84970820abf 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -14,8 +14,11 @@ namespace DB class ReadBufferFromFileDescriptor : public ReadBufferFromFileBase { protected: + const size_t required_alignment = 0; /// For O_DIRECT both file offsets and memory addresses have to be aligned. + bool use_pread = false; /// To access one fd from multiple threads, use 'pread' syscall instead of 'read'. + + size_t file_offset_of_buffer_end = 0; /// What offset in file corresponds to working_buffer.end(). int fd; - size_t file_offset_of_buffer_end; /// What offset in file corresponds to working_buffer.end(). bool nextImpl() override; @@ -24,7 +27,9 @@ protected: public: ReadBufferFromFileDescriptor(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) - : ReadBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_), file_offset_of_buffer_end(0) {} + : ReadBufferFromFileBase(buf_size, existing_memory, alignment), required_alignment(alignment), fd(fd_) + { + } int getFD() const { @@ -45,9 +50,23 @@ public: off_t size(); void setProgressCallback(ContextPtr context); + private: /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. bool poll(size_t timeout_microseconds); }; + +/** Similar to ReadBufferFromFileDescriptor but it is using 'pread' allowing multiple concurrent reads from the same fd. + */ +class ReadBufferFromFileDescriptorPRead : public ReadBufferFromFileDescriptor +{ +public: + ReadBufferFromFileDescriptorPRead(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) + : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment) + { + use_pread = true; + } +}; + } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 4e101aaaf63..e3a71789979 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -632,6 +632,22 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) return ReturnType(true); } +template +inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf) +{ + static constexpr bool throw_exception = std::is_same_v; + + LocalDate local_date; + + if constexpr (throw_exception) + readDateTextImpl(local_date, buf); + else if (!readDateTextImpl(local_date, buf)) + return false; + /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. + date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day(), -DateLUT::instance().getDayNumOffsetEpoch()); + return ReturnType(true); +} + inline void readDateText(LocalDate & date, ReadBuffer & buf) { @@ -643,6 +659,11 @@ inline void readDateText(DayNum & date, ReadBuffer & buf) readDateTextImpl(date, buf); } +inline void readDateText(ExtendedDayNum & date, ReadBuffer & buf) +{ + readDateTextImpl(date, buf); +} + inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) { return readDateTextImpl(date, buf); @@ -653,6 +674,11 @@ inline bool tryReadDateText(DayNum & date, ReadBuffer & buf) return readDateTextImpl(date, buf); } +inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf) +{ + return readDateTextImpl(date, buf); +} + template inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf) { diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index f8e6d817fb1..97620f0c03c 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -17,7 +17,7 @@ public: * Shifts buffer current position to given offset. * @param off Offset. * @param whence Seek mode (@see SEEK_SET, @see SEEK_CUR). - * @return New position from the begging of underlying buffer / file. + * @return New position from the beginning of underlying buffer / file. */ virtual off_t seek(off_t off, int whence) = 0; diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 7de6dbfa613..d5a123fa1f6 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -728,6 +728,11 @@ inline void writeDateText(DayNum date, WriteBuffer & buf) writeDateText(LocalDate(date), buf); } +template +inline void writeDateText(ExtendedDayNum date, WriteBuffer & buf) +{ + writeDateText(LocalDate(date), buf); +} /// In the format YYYY-MM-DD HH:MM:SS template diff --git a/src/IO/createReadBufferFromFileBase.cpp b/src/IO/createReadBufferFromFileBase.cpp index 230f049b2cb..11a0937ee48 100644 --- a/src/IO/createReadBufferFromFileBase.cpp +++ b/src/IO/createReadBufferFromFileBase.cpp @@ -1,8 +1,5 @@ #include #include -#if defined(OS_LINUX) || defined(__FreeBSD__) -#include -#endif #include #include @@ -10,8 +7,8 @@ namespace ProfileEvents { extern const Event CreatedReadBufferOrdinary; - extern const Event CreatedReadBufferAIO; - extern const Event CreatedReadBufferAIOFailed; + extern const Event CreatedReadBufferDirectIO; + extern const Event CreatedReadBufferDirectIOFailed; extern const Event CreatedReadBufferMMap; extern const Event CreatedReadBufferMMapFailed; } @@ -20,36 +17,15 @@ namespace DB { std::unique_ptr createReadBufferFromFileBase( - const std::string & filename_, - size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache, - size_t buffer_size_, int flags_, char * existing_memory_, size_t alignment) + const std::string & filename, + size_t estimated_size, size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache, + size_t buffer_size, int flags, char * existing_memory, size_t alignment) { -#if defined(OS_LINUX) || defined(__FreeBSD__) - if (aio_threshold && estimated_size >= aio_threshold) - { - /// Attempt to open a file with O_DIRECT - try - { - auto res = std::make_unique(filename_, buffer_size_, flags_, existing_memory_); - ProfileEvents::increment(ProfileEvents::CreatedReadBufferAIO); - return res; - } - catch (const ErrnoException &) - { - /// Fallback to cached IO if O_DIRECT is not supported. - ProfileEvents::increment(ProfileEvents::CreatedReadBufferAIOFailed); - } - } -#else - (void)aio_threshold; - (void)estimated_size; -#endif - - if (!existing_memory_ && mmap_threshold && mmap_cache && estimated_size >= mmap_threshold) + if (!existing_memory && mmap_threshold && mmap_cache && estimated_size >= mmap_threshold) { try { - auto res = std::make_unique(*mmap_cache, filename_, 0); + auto res = std::make_unique(*mmap_cache, filename, 0); ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); return res; } @@ -60,8 +36,63 @@ std::unique_ptr createReadBufferFromFileBase( } } +#if defined(OS_LINUX) || defined(__FreeBSD__) + if (direct_io_threshold && estimated_size >= direct_io_threshold) + { + /** O_DIRECT + * The O_DIRECT flag may impose alignment restrictions on the length and address of user-space buffers and the file offset of I/Os. + * In Linux alignment restrictions vary by filesystem and kernel version and might be absent entirely. + * However there is currently no filesystem-independent interface for an application to discover these restrictions + * for a given file or filesystem. Some filesystems provide their own interfaces for doing so, for example the + * XFS_IOC_DIOINFO operation in xfsctl(3). + * + * Under Linux 2.4, transfer sizes, and the alignment of the user buffer and the file offset must all be + * multiples of the logical block size of the filesystem. Since Linux 2.6.0, alignment to the logical block size + * of the underlying storage (typically 512 bytes) suffices. + * + * - man 2 open + */ + constexpr size_t min_alignment = DEFAULT_AIO_FILE_BLOCK_SIZE; + + auto align_up = [=](size_t value) { return (value + min_alignment - 1) / min_alignment * min_alignment; }; + + if (alignment == 0) + alignment = min_alignment; + else if (alignment % min_alignment) + alignment = align_up(alignment); + + if (buffer_size % min_alignment) + { + existing_memory = nullptr; /// Cannot reuse existing memory is it has unaligned size. + buffer_size = align_up(buffer_size); + } + + if (reinterpret_cast(existing_memory) % min_alignment) + { + existing_memory = nullptr; /// Cannot reuse existing memory is it has unaligned offset. + } + + /// Attempt to open a file with O_DIRECT + try + { + auto res = std::make_unique( + filename, buffer_size, (flags == -1 ? O_RDONLY | O_CLOEXEC : flags) | O_DIRECT, existing_memory, alignment); + ProfileEvents::increment(ProfileEvents::CreatedReadBufferDirectIO); + return res; + } + catch (const ErrnoException &) + { + /// Fallback to cached IO if O_DIRECT is not supported. + ProfileEvents::increment(ProfileEvents::CreatedReadBufferDirectIOFailed); + } + } +#else + (void)direct_io_threshold; + (void)estimated_size; +#endif + ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); - return std::make_unique(filename_, buffer_size_, flags_, existing_memory_, alignment); + return std::make_unique(filename, buffer_size, flags, existing_memory, alignment); } } diff --git a/src/IO/createReadBufferFromFileBase.h b/src/IO/createReadBufferFromFileBase.h index 46d5b39ea44..dc2912ea752 100644 --- a/src/IO/createReadBufferFromFileBase.h +++ b/src/IO/createReadBufferFromFileBase.h @@ -13,20 +13,20 @@ class MMappedFileCache; /** Create an object to read data from a file. * estimated_size - the number of bytes to read - * aio_threshold - the minimum number of bytes for asynchronous reads + * direct_io_threshold - the minimum number of bytes for asynchronous reads * - * If aio_threshold = 0 or estimated_size < aio_threshold, read operations are executed synchronously. + * If direct_io_threshold = 0 or estimated_size < direct_io_threshold, read operations are executed synchronously. * Otherwise, the read operations are performed asynchronously. */ std::unique_ptr createReadBufferFromFileBase( - const std::string & filename_, + const std::string & filename, size_t estimated_size, - size_t aio_threshold, + size_t direct_io_threshold, size_t mmap_threshold, MMappedFileCache * mmap_cache, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, + size_t buffer_size = DBMS_DEFAULT_BUFFER_SIZE, int flags_ = -1, - char * existing_memory_ = nullptr, + char * existing_memory = nullptr, size_t alignment = 0); } diff --git a/src/IO/examples/CMakeLists.txt b/src/IO/examples/CMakeLists.txt index bcd0a8bba24..d5907bf67ad 100644 --- a/src/IO/examples/CMakeLists.txt +++ b/src/IO/examples/CMakeLists.txt @@ -49,11 +49,6 @@ target_link_libraries (io_operators PRIVATE clickhouse_common_io) add_executable (write_int write_int.cpp) target_link_libraries (write_int PRIVATE clickhouse_common_io) -if (OS_LINUX OR OS_FREEBSD) - add_executable(read_buffer_aio read_buffer_aio.cpp) - target_link_libraries (read_buffer_aio PRIVATE clickhouse_common_io) -endif () - add_executable (zlib_buffers zlib_buffers.cpp) target_link_libraries (zlib_buffers PRIVATE clickhouse_common_io) diff --git a/src/IO/examples/read_buffer_aio.cpp b/src/IO/examples/read_buffer_aio.cpp deleted file mode 100644 index 01ac9808cbb..00000000000 --- a/src/IO/examples/read_buffer_aio.cpp +++ /dev/null @@ -1,670 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace -{ - -void run(); -void prepare(std::string & filename, std::string & buf); -void prepare2(std::string & filename, std::string & buf); -void prepare3(std::string & filename, std::string & buf); -void prepare4(std::string & filename, std::string & buf); -std::string createTmpFile(); -[[noreturn]] void die(const std::string & msg); -void runTest(unsigned int num, const std::function & func); - -bool test1(const std::string & filename); -bool test2(const std::string & filename, const std::string & buf); -bool test3(const std::string & filename, const std::string & buf); -bool test4(const std::string & filename, const std::string & buf); -bool test5(const std::string & filename, const std::string & buf); -bool test6(const std::string & filename, const std::string & buf); -bool test7(const std::string & filename, const std::string & buf); -bool test8(const std::string & filename, const std::string & buf); -bool test9(const std::string & filename, const std::string & buf); -bool test10(const std::string & filename, const std::string & buf); -bool test11(const std::string & filename); -bool test12(const std::string & filename, const std::string & buf); -bool test13(const std::string & filename, const std::string & buf); -bool test14(const std::string & filename, const std::string & buf); -bool test15(const std::string & filename, const std::string & buf); -bool test16(const std::string & filename, const std::string & buf); -bool test17(const std::string & filename, const std::string & buf); -bool test18(const std::string & filename, const std::string & buf); -bool test19(const std::string & filename, const std::string & buf); -bool test20(const std::string & filename, const std::string & buf); - -void run() -{ - namespace fs = std::filesystem; - - std::string filename; - std::string buf; - prepare(filename, buf); - - std::string filename2; - std::string buf2; - prepare(filename2, buf2); - - std::string filename3; - std::string buf3; - prepare2(filename3, buf3); - - std::string filename4; - std::string buf4; - prepare3(filename4, buf4); - - std::string filename5; - std::string buf5; - prepare4(filename5, buf5); - - const std::vector> tests = - { - [&]{ return test1(filename); }, - [&]{ return test2(filename, buf); }, - [&]{ return test3(filename, buf); }, - [&]{ return test4(filename, buf); }, - [&]{ return test5(filename, buf); }, - [&]{ return test6(filename, buf); }, - [&]{ return test7(filename, buf); }, - [&]{ return test8(filename, buf); }, - [&]{ return test9(filename, buf); }, - [&]{ return test10(filename, buf); }, - [&]{ return test11(filename); }, - [&]{ return test12(filename, buf); }, - [&]{ return test13(filename2, buf2); }, - [&]{ return test14(filename, buf); }, - [&]{ return test15(filename3, buf3); }, - [&]{ return test16(filename3, buf3); }, - [&]{ return test17(filename4, buf4); }, - [&]{ return test18(filename5, buf5); }, - [&]{ return test19(filename, buf); }, - [&]{ return test20(filename, buf); } - }; - - unsigned int num = 0; - for (const auto & test : tests) - { - ++num; - runTest(num, test); - } - - fs::remove_all(fs::path(filename).parent_path().string()); - fs::remove_all(fs::path(filename2).parent_path().string()); - fs::remove_all(fs::path(filename3).parent_path().string()); - fs::remove_all(fs::path(filename4).parent_path().string()); - fs::remove_all(fs::path(filename5).parent_path().string()); -} - -void prepare(std::string & filename, std::string & buf) -{ - static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - - filename = createTmpFile(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; - buf.reserve(n); - - for (size_t i = 0; i < n; ++i) - buf += symbols[i % symbols.length()]; - - std::ofstream out(filename.c_str()); - if (!out.is_open()) - die("Could not open file"); - - out << buf; -} - -void prepare2(std::string & filename, std::string & buf) -{ - filename = createTmpFile(); - - buf = "122333444455555666666777777788888888999999999"; - - std::ofstream out(filename.c_str()); - if (!out.is_open()) - die("Could not open file"); - - out << buf; -} - -void prepare3(std::string & filename, std::string & buf) -{ - filename = createTmpFile(); - - buf = "122333444455555666666777777788888888999999999"; - - std::ofstream out(filename.c_str()); - if (!out.is_open()) - die("Could not open file"); - - out.seekp(7, std::ios_base::beg); - out << buf; -} - -void prepare4(std::string & filename, std::string & buf) -{ - static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - - filename = createTmpFile(); - - std::ofstream out(filename.c_str()); - if (!out.is_open()) - die("Could not open file"); - - for (size_t i = 0; i < 1340; ++i) - buf += symbols[i % symbols.length()]; - - out.seekp(2984, std::ios_base::beg); - out << buf; -} - -std::string createTmpFile() -{ - char pattern[] = "/tmp/fileXXXXXX"; - char * dir = ::mkdtemp(pattern); - if (dir == nullptr) - die("Could not create directory"); - - return std::string(dir) + "/foo"; -} - -void die(const std::string & msg) -{ - std::cout << msg << "\n"; - ::exit(EXIT_FAILURE); -} - -void runTest(unsigned int num, const std::function & func) -{ - bool ok; - - try - { - ok = func(); - } - catch (const DB::Exception & ex) - { - ok = false; - std::cout << "Caught exception " << ex.displayText() << "\n"; - } - catch (const std::exception & ex) - { - ok = false; - std::cout << "Caught exception " << ex.what() << "\n"; - } - - if (ok) - std::cout << "Test " << num << " passed\n"; - else - std::cout << "Test " << num << " failed\n"; -} - -bool test1(const std::string & filename) -{ - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - if (in.getFileName() != filename) - return false; - if (in.getFD() == -1) - return false; - return true; -} - -bool test2(const std::string & filename, const std::string & buf) -{ - std::string newbuf; - newbuf.resize(buf.length()); - - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - size_t count = in.read(newbuf.data(), newbuf.length()); - if (count != newbuf.length()) - return false; - - return (newbuf == buf); -} - -bool test3(const std::string & filename, const std::string & buf) -{ - std::string newbuf; - newbuf.resize(buf.length()); - - size_t requested = 9 * DEFAULT_AIO_FILE_BLOCK_SIZE; - - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - in.setMaxBytes(requested); - size_t count = in.read(newbuf.data(), newbuf.length()); - - newbuf.resize(count); - return (newbuf == buf.substr(0, requested)); -} - -bool test4(const std::string & filename, const std::string & buf) -{ - std::string newbuf; - newbuf.resize(buf.length()); - - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - in.setMaxBytes(0); - size_t n_read = in.read(newbuf.data(), newbuf.length()); - - return n_read == 0; -} - -bool test5(const std::string & filename, const std::string & buf) -{ - std::string newbuf; - newbuf.resize(1 + (DEFAULT_AIO_FILE_BLOCK_SIZE >> 1)); - - DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE); - in.setMaxBytes(1 + (DEFAULT_AIO_FILE_BLOCK_SIZE >> 1)); - - size_t count = in.read(newbuf.data(), newbuf.length()); - if (count != newbuf.length()) - return false; - - if (newbuf != buf.substr(0, newbuf.length())) - return false; - - return true; -} - -bool test6(const std::string & filename, const std::string & buf) -{ - std::string newbuf; - newbuf.resize(buf.length()); - - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - if (in.getPosition() != 0) - return false; - - size_t count = in.read(newbuf.data(), newbuf.length()); - if (count != newbuf.length()) - return false; - - if (static_cast(in.getPosition()) != buf.length()) - return false; - - return true; -} - -bool test7(const std::string & filename, const std::string & buf) -{ - std::string newbuf; - newbuf.resize(buf.length() - DEFAULT_AIO_FILE_BLOCK_SIZE); - - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - (void) in.seek(DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_SET); - size_t count = in.read(newbuf.data(), newbuf.length()); - if (count != (9 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - - return (newbuf == buf.substr(DEFAULT_AIO_FILE_BLOCK_SIZE)); -} - -bool test8(const std::string & filename, const std::string & buf) -{ - std::string newbuf; - newbuf.resize(DEFAULT_AIO_FILE_BLOCK_SIZE - 1); - - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - (void) in.seek(DEFAULT_AIO_FILE_BLOCK_SIZE + 1, SEEK_CUR); - size_t count = in.read(newbuf.data(), newbuf.length()); - - if (count != newbuf.length()) - return false; - - if (newbuf != buf.substr(DEFAULT_AIO_FILE_BLOCK_SIZE + 1, newbuf.length())) - return false; - - return true; -} - -bool test9(const std::string & filename, const std::string & buf) -{ - bool ok = false; - - try - { - std::string newbuf; - newbuf.resize(buf.length()); - - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - size_t count = in.read(newbuf.data(), newbuf.length()); - if (count != newbuf.length()) - return false; - in.setMaxBytes(9 * DEFAULT_AIO_FILE_BLOCK_SIZE); - } - catch (const DB::Exception &) - { - ok = true; - } - - return ok; -} - -bool test10(const std::string & filename, const std::string & buf) -{ - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - { - std::string newbuf; - newbuf.resize(4 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - size_t count1 = in.read(newbuf.data(), newbuf.length()); - if (count1 != newbuf.length()) - return false; - - if (newbuf != buf.substr(0, 4 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - } - - (void) in.seek(2 * DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_CUR); - - { - std::string newbuf; - newbuf.resize(4 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - size_t count2 = in.read(newbuf.data(), newbuf.length()); - if (count2 != newbuf.length()) - return false; - - if (newbuf != buf.substr(6 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - } - - return true; -} - -bool test11(const std::string & filename) -{ - bool ok = false; - - try - { - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - (void) in.seek(-DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_SET); - } - catch (const DB::Exception &) - { - ok = true; - } - - return ok; -} - -bool test12(const std::string & filename, const std::string &) -{ - bool ok = false; - - try - { - std::string newbuf; - newbuf.resize(4 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - size_t count = in.read(newbuf.data(), newbuf.length()); - if (count != newbuf.length()) - return false; - - (void) in.seek(-(10 * DEFAULT_AIO_FILE_BLOCK_SIZE), SEEK_CUR); - } - catch (const DB::Exception &) - { - ok = true; - } - - return ok; -} - -bool test13(const std::string & filename, const std::string &) -{ - std::string newbuf; - newbuf.resize(2 * DEFAULT_AIO_FILE_BLOCK_SIZE - 3); - - DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE); - size_t count1 = in.read(newbuf.data(), newbuf.length()); - return count1 == newbuf.length(); -} - -bool test14(const std::string & filename, const std::string & buf) -{ - std::string newbuf; - newbuf.resize(1 + (DEFAULT_AIO_FILE_BLOCK_SIZE >> 1)); - - DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE); - (void) in.seek(2, SEEK_SET); - in.setMaxBytes(3 + (DEFAULT_AIO_FILE_BLOCK_SIZE >> 1)); - - size_t count = in.read(newbuf.data(), newbuf.length()); - if (count != newbuf.length()) - return false; - - if (newbuf != buf.substr(2, newbuf.length())) - return false; - - return true; -} - -bool test15(const std::string & filename, const std::string &) -{ - std::string newbuf; - newbuf.resize(1000); - - DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE); - - size_t count = in.read(newbuf.data(), 1); - if (count != 1) - return false; - if (newbuf[0] != '1') - return false; - return true; -} - -bool test16(const std::string & filename, const std::string &) -{ - DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE); - size_t count; - - { - std::string newbuf; - newbuf.resize(1); - count = in.read(newbuf.data(), 1); - if (count != 1) - return false; - if (newbuf[0] != '1') - return false; - } - - in.seek(2, SEEK_CUR); - - { - std::string newbuf; - newbuf.resize(3); - count = in.read(newbuf.data(), 3); - if (count != 3) - return false; - if (newbuf != "333") - return false; - } - - in.seek(4, SEEK_CUR); - - { - std::string newbuf; - newbuf.resize(5); - count = in.read(newbuf.data(), 5); - if (count != 5) - return false; - if (newbuf != "55555") - return false; - } - - in.seek(6, SEEK_CUR); - - { - std::string newbuf; - newbuf.resize(7); - count = in.read(newbuf.data(), 7); - if (count != 7) - return false; - if (newbuf != "7777777") - return false; - } - - in.seek(8, SEEK_CUR); - - { - std::string newbuf; - newbuf.resize(9); - count = in.read(newbuf.data(), 9); - if (count != 9) - return false; - if (newbuf != "999999999") - return false; - } - - return true; -} - -bool test17(const std::string & filename, const std::string & buf) -{ - DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE); - size_t count; - - { - std::string newbuf; - newbuf.resize(10); - count = in.read(newbuf.data(), 10); - - if (count != 10) - return false; - if (newbuf.substr(0, 7) != std::string(7, '\0')) - return false; - if (newbuf.substr(7) != "122") - return false; - } - - in.seek(7 + buf.length() - 2, SEEK_SET); - - { - std::string newbuf; - newbuf.resize(160); - count = in.read(newbuf.data(), 160); - - if (count != 2) - return false; - if (newbuf.substr(0, 2) != "99") - return false; - } - - in.seek(7 + buf.length() + DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_SET); - - { - std::string newbuf; - newbuf.resize(50); - count = in.read(newbuf.data(), 50); - if (count != 0) - return false; - } - - return true; -} - -bool test18(const std::string & filename, const std::string & buf) -{ - DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE); - - std::string newbuf; - newbuf.resize(1340); - - in.seek(2984, SEEK_SET); - size_t count = in.read(newbuf.data(), 1340); - - if (count != 1340) - return false; - if (newbuf != buf) - return false; - - return true; -} - -bool test19(const std::string & filename, const std::string & buf) -{ - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - { - std::string newbuf; - newbuf.resize(5 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - size_t count1 = in.read(newbuf.data(), newbuf.length()); - if (count1 != newbuf.length()) - return false; - - if (newbuf != buf.substr(0, 5 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - } - - { - std::string newbuf; - newbuf.resize(5 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - size_t count2 = in.read(newbuf.data(), newbuf.length()); - if (count2 != newbuf.length()) - return false; - - if (newbuf != buf.substr(5 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - } - - return true; -} - -bool test20(const std::string & filename, const std::string & buf) -{ - DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - { - std::string newbuf; - newbuf.resize(5 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - size_t count1 = in.read(newbuf.data(), newbuf.length()); - if (count1 != newbuf.length()) - return false; - - if (newbuf != buf.substr(0, 5 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - } - - (void) in.getPosition(); - - { - std::string newbuf; - newbuf.resize(5 * DEFAULT_AIO_FILE_BLOCK_SIZE); - - size_t count2 = in.read(newbuf.data(), newbuf.length()); - if (count2 != newbuf.length()) - return false; - - if (newbuf != buf.substr(5 * DEFAULT_AIO_FILE_BLOCK_SIZE)) - return false; - } - - return true; -} - -} - -int main() -{ - run(); - return 0; -} diff --git a/src/IO/tests/gtest_aio_seek_back_after_eof.cpp b/src/IO/tests/gtest_aio_seek_back_after_eof.cpp deleted file mode 100644 index 784f5479657..00000000000 --- a/src/IO/tests/gtest_aio_seek_back_after_eof.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#if defined(__linux__) || defined(__FreeBSD__) - -#include - -#include -#include -#include -#include -#include -#include -#include - - -namespace -{ -std::string createTmpFileForEOFtest() -{ - char pattern[] = "./EOFtestFolderXXXXXX"; - if (char * dir = ::mkdtemp(pattern); dir) - { - return std::string(dir) + "/foo"; - } - else - { - /// We have no tmp in docker - /// So we have to use root - std::string almost_rand_dir = std::string{"/"} + std::to_string(randomSeed()) + "foo"; - return almost_rand_dir; - } - -} - -void prepareForEOF(std::string & filename, std::string & buf) -{ - static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - - filename = createTmpFileForEOFtest(); - - size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE; - buf.reserve(n); - - for (size_t i = 0; i < n; ++i) - buf += symbols[i % symbols.length()]; - - std::ofstream out(filename); - out << buf; -} - - -} -TEST(ReadBufferAIOTest, TestReadAfterAIO) -{ - using namespace DB; - std::string data; - std::string file_path; - prepareForEOF(file_path, data); - ReadBufferAIO testbuf(file_path); - - std::string newdata; - newdata.resize(data.length()); - - size_t total_read = testbuf.read(newdata.data(), newdata.length()); - EXPECT_EQ(total_read, data.length()); - EXPECT_TRUE(testbuf.eof()); - - - testbuf.seek(data.length() - 100, SEEK_SET); - - std::string smalldata; - smalldata.resize(100); - size_t read_after_eof = testbuf.read(smalldata.data(), smalldata.size()); - EXPECT_EQ(read_after_eof, 100); - EXPECT_TRUE(testbuf.eof()); - - - testbuf.seek(0, SEEK_SET); - std::string repeatdata; - repeatdata.resize(data.length()); - size_t read_after_eof_big = testbuf.read(repeatdata.data(), repeatdata.size()); - EXPECT_EQ(read_after_eof_big, data.length()); - EXPECT_TRUE(testbuf.eof()); - - if (file_path[0] != '/') - { - const size_t last_slash = file_path.rfind('/'); - const std::string temp_dir = file_path.substr(0, last_slash); - std::filesystem::remove_all(temp_dir); - } -} - -#endif diff --git a/src/IO/ya.make b/src/IO/ya.make index d8bdfa95295..bca108ca426 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -44,7 +44,6 @@ SRCS( NullWriteBuffer.cpp PeekableReadBuffer.cpp Progress.cpp - ReadBufferAIO.cpp ReadBufferFromFile.cpp ReadBufferFromFileBase.cpp ReadBufferFromFileDecorator.cpp diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 1518706f0a6..63b0345b372 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -26,6 +26,7 @@ namespace ErrorCodes extern const int THERE_IS_NO_COLUMN; extern const int ILLEGAL_COLUMN; extern const int NOT_FOUND_COLUMN_IN_BLOCK; + extern const int BAD_ARGUMENTS; } const char * ActionsDAG::typeToString(ActionsDAG::ActionType type) @@ -202,6 +203,7 @@ const ActionsDAG::Node & ActionsDAG::addFunction( node.function_base = function->build(arguments); node.result_type = node.function_base->getResultType(); node.function = node.function_base->prepare(arguments); + node.is_deterministic = node.function_base->isDeterministic(); /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. if (node.function_base->isSuitableForConstantFolding()) @@ -426,6 +428,16 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs) { /// Constant folding. node->type = ActionsDAG::ActionType::COLUMN; + + for (const auto & child : node->children) + { + if (!child->is_deterministic) + { + node->is_deterministic = false; + break; + } + } + node->children.clear(); } @@ -981,6 +993,14 @@ bool ActionsDAG::trivial() const return true; } +void ActionsDAG::assertDeterministic() const +{ + for (const auto & node : nodes) + if (!node.is_deterministic) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expression must be deterministic but it contains non-deterministic part `{}`", node.result_name); +} + void ActionsDAG::addMaterializingOutputActions() { for (auto & node : index) diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 9cd0057bb1a..bfb5b177ac7 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -83,6 +83,9 @@ public: ExecutableFunctionPtr function; /// If function is a compiled statement. bool is_function_compiled = false; + /// It is deterministic (See IFunction::isDeterministic). + /// This property is kept after constant folding of non-deterministic functions like 'now', 'today'. + bool is_deterministic = true; /// For COLUMN node and propagated constants. ColumnPtr column; @@ -175,6 +178,7 @@ public: bool hasArrayJoin() const; bool hasStatefulFunctions() const; bool trivial() const; /// If actions has no functions or array join. + void assertDeterministic() const; /// Throw if not isDeterministic. #if USE_EMBEDDED_COMPILER void compileExpressions(size_t min_count_to_compile_expression); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 7aad11252cb..03fa756276e 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -686,7 +686,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat ASTs columns; size_t tid = 0; - for (const auto & name : tuple_type->getElementNames()) + for (const auto & name [[maybe_unused]] : tuple_type->getElementNames()) { auto tuple_ast = function->arguments->children[0]; if (tid != 0) @@ -697,11 +697,6 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat auto func = makeASTFunction("tupleElement", tuple_ast, literal); - if (tuple_type->haveExplicitNames()) - func->setAlias(name); - else - func->setAlias(data.getUniqueName("_ut_" + name)); - auto function_builder = FunctionFactory::instance().get(func->name, data.getContext()); data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName(data.getContext()->getSettingsRef())}, func->getColumnName(data.getContext()->getSettingsRef())); diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index aca92b8866d..da514759eb5 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -48,7 +48,7 @@ namespace ErrorCodes static constexpr size_t small_buffer_size = 4096; -static void openFileIfExists(const char * filename, std::optional & out) +static void openFileIfExists(const char * filename, std::optional & out) { /// Ignoring time of check is not time of use cases, as procfs/sysfs files are fairly persistent. @@ -57,11 +57,11 @@ static void openFileIfExists(const char * filename, std::optional openFileIfExists(const std::string & filename) +static std::unique_ptr openFileIfExists(const std::string & filename) { std::error_code ec; if (std::filesystem::is_regular_file(filename, ec)) - return std::make_unique(filename, small_buffer_size); + return std::make_unique(filename, small_buffer_size); return {}; } @@ -89,7 +89,7 @@ AsynchronousMetrics::AsynchronousMetrics( for (size_t thermal_device_index = 0;; ++thermal_device_index) { - std::unique_ptr file = openFileIfExists(fmt::format("/sys/class/thermal/thermal_zone{}/temp", thermal_device_index)); + std::unique_ptr file = openFileIfExists(fmt::format("/sys/class/thermal/thermal_zone{}/temp", thermal_device_index)); if (!file) { /// Sometimes indices are from zero sometimes from one. @@ -113,7 +113,7 @@ AsynchronousMetrics::AsynchronousMetrics( } String hwmon_name; - ReadBufferFromFile hwmon_name_in(hwmon_name_file, small_buffer_size); + ReadBufferFromFilePRead hwmon_name_in(hwmon_name_file, small_buffer_size); readText(hwmon_name, hwmon_name_in); std::replace(hwmon_name.begin(), hwmon_name.end(), ' ', '_'); @@ -134,14 +134,14 @@ AsynchronousMetrics::AsynchronousMetrics( break; } - std::unique_ptr file = openFileIfExists(sensor_value_file); + std::unique_ptr file = openFileIfExists(sensor_value_file); if (!file) continue; String sensor_name; if (sensor_name_file_exists) { - ReadBufferFromFile sensor_name_in(sensor_name_file, small_buffer_size); + ReadBufferFromFilePRead sensor_name_in(sensor_name_file, small_buffer_size); readText(sensor_name, sensor_name_in); std::replace(sensor_name.begin(), sensor_name.end(), ' ', '_'); } @@ -184,7 +184,7 @@ AsynchronousMetrics::AsynchronousMetrics( if (device_name.starts_with("loop")) continue; - std::unique_ptr file = openFileIfExists(device_dir.path() / "stat"); + std::unique_ptr file = openFileIfExists(device_dir.path() / "stat"); if (!file) continue; @@ -1021,7 +1021,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti { try { - ReadBufferFromFile & in = *thermal[i]; + ReadBufferFromFilePRead & in = *thermal[i]; in.rewind(); Int64 temperature = 0; @@ -1065,7 +1065,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti { if (edac[i].first) { - ReadBufferFromFile & in = *edac[i].first; + ReadBufferFromFilePRead & in = *edac[i].first; in.rewind(); uint64_t errors = 0; readText(errors, in); @@ -1074,7 +1074,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti if (edac[i].second) { - ReadBufferFromFile & in = *edac[i].second; + ReadBufferFromFilePRead & in = *edac[i].second; in.rewind(); uint64_t errors = 0; readText(errors, in); @@ -1179,7 +1179,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti total_number_of_parts += table_merge_tree->getPartsCount(); } - if (StorageReplicatedMergeTree * table_replicated_merge_tree = dynamic_cast(table.get())) + if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast(table.get())) { StorageReplicatedMergeTree::Status status; table_replicated_merge_tree->getStatus(status, false); diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 606d117e605..07e117c4dd9 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -82,25 +82,25 @@ private: #if defined(OS_LINUX) MemoryStatisticsOS memory_stat; - std::optional meminfo; - std::optional loadavg; - std::optional proc_stat; - std::optional cpuinfo; - std::optional file_nr; - std::optional uptime; - std::optional net_dev; + std::optional meminfo; + std::optional loadavg; + std::optional proc_stat; + std::optional cpuinfo; + std::optional file_nr; + std::optional uptime; + std::optional net_dev; - std::vector> thermal; + std::vector> thermal; std::unordered_map>> hwmon_devices; + std::unique_ptr>> hwmon_devices; std::vector /* correctable errors */, - std::unique_ptr /* uncorrectable errors */>> edac; + std::unique_ptr /* correctable errors */, + std::unique_ptr /* uncorrectable errors */>> edac; - std::unordered_map> block_devs; + std::unordered_map> block_devs; /// TODO: socket statistics. diff --git a/src/Interpreters/BloomFilterHash.h b/src/Interpreters/BloomFilterHash.h index ae9aa9d193b..8a891ff1dfe 100644 --- a/src/Interpreters/BloomFilterHash.h +++ b/src/Interpreters/BloomFilterHash.h @@ -90,6 +90,7 @@ struct BloomFilterHash else if (which.isEnum8()) return build_hash_column(getNumberTypeHash(field)); else if (which.isEnum16()) return build_hash_column(getNumberTypeHash(field)); else if (which.isDate()) return build_hash_column(getNumberTypeHash(field)); + else if (which.isDate32()) return build_hash_column(getNumberTypeHash(field)); else if (which.isDateTime()) return build_hash_column(getNumberTypeHash(field)); else if (which.isFloat32()) return build_hash_column(getNumberTypeHash(field)); else if (which.isFloat64()) return build_hash_column(getNumberTypeHash(field)); @@ -151,6 +152,7 @@ struct BloomFilterHash else if (which.isEnum8()) getNumberTypeHash(column, vec, pos); else if (which.isEnum16()) getNumberTypeHash(column, vec, pos); else if (which.isDate()) getNumberTypeHash(column, vec, pos); + else if (which.isDate32()) getNumberTypeHash(column, vec, pos); else if (which.isDateTime()) getNumberTypeHash(column, vec, pos); else if (which.isFloat32()) getNumberTypeHash(column, vec, pos); else if (which.isFloat64()) getNumberTypeHash(column, vec, pos); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9b204f12ab2..da994a67441 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -386,6 +386,7 @@ struct ContextSharedPart ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers std::unique_ptr system_logs; /// Used to log queries and operations on parts std::optional storage_s3_settings; /// Settings of S3 storage + std::vector warnings; /// Store warning messages about server configuration. RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml @@ -514,6 +515,13 @@ struct ContextSharedPart trace_collector.emplace(std::move(trace_log)); } + + void addWarningMessage(const String & message) + { + /// A warning goes both: into server's log; stored to be placed in `system.warnings` table. + log->warning(message); + warnings.push_back(message); + } }; @@ -635,6 +643,12 @@ String Context::getDictionariesLibPath() const return shared->dictionaries_lib_path; } +std::vector Context::getWarnings() const +{ + auto lock = getLock(); + return shared->warnings; +} + VolumePtr Context::getTemporaryVolume() const { auto lock = getLock(); @@ -706,6 +720,12 @@ void Context::setDictionariesLibPath(const String & path) shared->dictionaries_lib_path = path; } +void Context::addWarningMessage(const String & msg) +{ + auto lock = getLock(); + shared->addWarningMessage(msg); +} + void Context::setConfig(const ConfigurationPtr & config) { auto lock = getLock(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 2b53c737915..0b73d0c4e1c 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -319,6 +319,9 @@ public: String getUserFilesPath() const; String getDictionariesLibPath() const; + /// A list of warnings about server configuration to place in `system.warnings` table. + std::vector getWarnings() const; + VolumePtr getTemporaryVolume() const; void setPath(const String & path); @@ -326,6 +329,8 @@ public: void setUserFilesPath(const String & path); void setDictionariesLibPath(const String & path); + void addWarningMessage(const String & msg); + VolumePtr setTemporaryStorage(const String & path, const String & policy_name = ""); using ConfigurationPtr = Poco::AutoPtr; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index bd06c753319..905fcf0331c 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -531,11 +531,12 @@ Names ExpressionActions::getRequiredColumns() const bool ExpressionActions::hasArrayJoin() const { - for (const auto & action : actions) - if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - return true; + return getActionsDAG().hasArrayJoin(); +} - return false; +void ExpressionActions::assertDeterministic() const +{ + getActionsDAG().assertDeterministic(); } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 7699e82a73b..4fddd1fd27e 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -103,6 +103,7 @@ public: void execute(Block & block, bool dry_run = false) const; bool hasArrayJoin() const; + void assertDeterministic() const; /// Obtain a sample block that contains the names and types of result columns. const Block & getSampleBlock() const { return sample_block; } diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5d92f4f8b6f..a9c7cb61a0a 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -198,8 +198,9 @@ private: { ASTPtr & ast = func.arguments->children[1]; - /// Literal can use regular IN - if (ast->as()) + /// Literal or function can use regular IN. + /// NOTE: We don't support passing table functions to IN. + if (ast->as() || ast->as()) { if (func.name == "globalIn") func.name = "in"; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d820cbbae45..bd9d7516f0f 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -399,7 +399,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( view = nullptr; } - if (try_move_to_prewhere && storage && query.where() && !query.prewhere()) + if (try_move_to_prewhere && storage && storage->supportsPrewhere() && query.where() && !query.prewhere()) { /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 08bfb1e175b..be0a10cf2e7 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -159,10 +159,18 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()).toUnderType()); } + else if (which_type.isDate32() && which_from_type.isDateTime()) + { + return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()).toUnderType()); + } else if (which_type.isDateTime() && which_from_type.isDate()) { return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); } + else if (which_type.isDateTime() && which_from_type.isDate32()) + { + return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); + } else if (type.isValueRepresentedByNumber() && src.getType() != Field::Types::String) { if (which_type.isUInt8()) return convertNumericType(src, type); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 5b55754f00a..99c08c70b7c 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -948,7 +948,8 @@ void executeQuery( WriteBuffer & ostr, bool allow_into_outfile, ContextMutablePtr context, - std::function set_result_details) + std::function set_result_details, + std::function before_finalize_callback) { PODArray parse_buf; const char * begin; @@ -1079,6 +1080,8 @@ void executeQuery( out->onProgress(progress); }); + out->setBeforeFinalizeCallback(before_finalize_callback); + if (set_result_details) set_result_details( context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone()); diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 6448b26a652..77f142de121 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -17,7 +17,8 @@ void executeQuery( WriteBuffer & ostr, /// Where to write query output to. bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file. ContextMutablePtr context, /// DB, tables, data types, storage engines, functions, aggregate functions... - std::function set_result_details /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone. + std::function set_result_details, /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone. + std::function before_finalize_callback = {} /// Will be set in output format to be called before finalize. ); diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 548ec8879bd..b1f7570878f 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -16,11 +17,12 @@ namespace DB namespace ErrorCodes { - extern const int TOP_AND_LIMIT_TOGETHER; - extern const int WITH_TIES_WITHOUT_ORDER_BY; + extern const int FIRST_AND_NEXT_TOGETHER; extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; - extern const int FIRST_AND_NEXT_TOGETHER; + extern const int SYNTAX_ERROR; + extern const int TOP_AND_LIMIT_TOGETHER; + extern const int WITH_TIES_WITHOUT_ORDER_BY; } @@ -32,6 +34,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_select("SELECT"); ParserKeyword s_all("ALL"); ParserKeyword s_distinct("DISTINCT"); + ParserKeyword s_distinct_on("DISTINCT ON"); ParserKeyword s_from("FROM"); ParserKeyword s_prewhere("PREWHERE"); ParserKeyword s_where("WHERE"); @@ -77,12 +80,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr limit_by_length; ASTPtr limit_by_offset; ASTPtr limit_by_expression_list; + ASTPtr distinct_on_expression_list; ASTPtr limit_offset; ASTPtr limit_length; ASTPtr top_length; ASTPtr settings; - /// WITH expr list + /// WITH expr_list { if (s_with.ignore(pos, expected)) { @@ -94,7 +98,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - /// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list + /// SELECT [ALL/DISTINCT [ON (expr_list)]] [TOP N [WITH TIES]] expr_list { bool has_all = false; if (!s_select.ignore(pos, expected)) @@ -103,13 +107,27 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_all.ignore(pos, expected)) has_all = true; - if (s_distinct.ignore(pos, expected)) + if (s_distinct_on.ignore(pos, expected)) + { + if (open_bracket.ignore(pos, expected)) + { + if (!exp_list.parse(pos, distinct_on_expression_list, expected)) + return false; + if (!close_bracket.ignore(pos, expected)) + return false; + } + else + return false; + } + else if (s_distinct.ignore(pos, expected)) + { select_query->distinct = true; + } if (!has_all && s_all.ignore(pos, expected)) has_all = true; - if (has_all && select_query->distinct) + if (has_all && (select_query->distinct || distinct_on_expression_list)) return false; if (s_top.ignore(pos, expected)) @@ -256,13 +274,19 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->limit_with_ties = true; } + if (limit_with_ties_occured && distinct_on_expression_list) + throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + if (s_by.ignore(pos, expected)) { /// WITH TIES was used alongside LIMIT BY /// But there are other kind of queries like LIMIT n BY smth LIMIT m WITH TIES which are allowed. /// So we have to ignore WITH TIES exactly in LIMIT BY state. if (limit_with_ties_occured) - throw Exception("Can not use WITH TIES alongside LIMIT BY", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + + if (distinct_on_expression_list) + throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::SYNTAX_ERROR); limit_by_length = limit_length; limit_by_offset = limit_offset; @@ -335,6 +359,17 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } + if (distinct_on_expression_list) + { + /// DISTINCT ON and LIMIT BY are mutually exclusive, checked before + assert (limit_by_expression_list == nullptr); + + /// Transform `DISTINCT ON expr` to `LIMIT 1 BY expr` + limit_by_expression_list = distinct_on_expression_list; + limit_by_length = std::make_shared(Field{UInt8(1)}); + distinct_on_expression_list = nullptr; + } + /// Because TOP n in totally equals LIMIT n if (top_length) limit_length = top_length; diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index 88649d9ca25..7d82c267f36 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -76,6 +76,9 @@ void IOutputFormat::work() if (rows_before_limit_counter && rows_before_limit_counter->hasAppliedLimit()) setRowsBeforeLimit(rows_before_limit_counter->get()); + if (before_finalize_callback) + before_finalize_callback(); + finalize(); finalized = true; return; @@ -117,4 +120,3 @@ void IOutputFormat::write(const Block & block) } } - diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 4c2b3f30070..4d86d18f70e 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -67,6 +67,9 @@ public: /// Passed value are delta, that must be summarized. virtual void onProgress(const Progress & /*progress*/) {} + /// Set callback, which will be called before call to finalize(). + void setBeforeFinalizeCallback(std::function callback) { before_finalize_callback = callback; } + /// Content-Type to set when sending HTTP response. virtual std::string getContentType() const { return "text/plain; charset=UTF-8"; } @@ -91,6 +94,7 @@ private: size_t result_bytes = 0; bool prefix_written = false; + + std::function before_finalize_callback; }; } - diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 16f27058121..01c19deb837 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -53,6 +53,7 @@ namespace DB {arrow::Type::BOOL, "UInt8"}, {arrow::Type::DATE32, "Date"}, + {arrow::Type::DATE32, "Date32"}, {arrow::Type::DATE64, "DateTime"}, {arrow::Type::TIMESTAMP, "DateTime"}, @@ -145,9 +146,36 @@ namespace DB } /// Arrow stores Parquet::DATE in Int32, while ClickHouse stores Date in UInt16. Therefore, it should be checked before saving - static void fillColumnWithDate32Data(std::shared_ptr & arrow_column, IColumn & internal_column) +static void fillColumnWithDate32Data(std::shared_ptr & arrow_column, IColumn & internal_column) +{ + PaddedPODArray & column_data = assert_cast &>(internal_column).getData(); + column_data.reserve(arrow_column->length()); + + for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { - PaddedPODArray & column_data = assert_cast &>(internal_column).getData(); + arrow::Date32Array & chunk = static_cast(*(arrow_column->chunk(chunk_i))); + + for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) + { + UInt32 days_num = static_cast(chunk.Value(value_i)); + if (days_num > DATE_LUT_MAX_DAY_NUM) + { + // TODO: will it rollback correctly? + throw Exception + { + fmt::format("Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", days_num, internal_column.getName(), DATE_LUT_MAX_DAY_NUM), + ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE + }; + } + + column_data.emplace_back(days_num); + } + } +} + + static void fillDate32ColumnWithDate32Data(std::shared_ptr & arrow_column, IColumn & internal_column) + { + PaddedPODArray & column_data = assert_cast &>(internal_column).getData(); column_data.reserve(arrow_column->length()); for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) @@ -156,8 +184,8 @@ namespace DB for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) { - UInt32 days_num = static_cast(chunk.Value(value_i)); - if (days_num > DATE_LUT_MAX_DAY_NUM) + Int32 days_num = static_cast(chunk.Value(value_i)); + if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM) { // TODO: will it rollback correctly? throw Exception @@ -328,7 +356,14 @@ namespace DB fillColumnWithBooleanData(arrow_column, internal_column); break; case arrow::Type::DATE32: - fillColumnWithDate32Data(arrow_column, internal_column); + if (WhichDataType(internal_column.getDataType()).isUInt16()) + { + fillColumnWithDate32Data(arrow_column, internal_column); + } + else + { + fillDate32ColumnWithDate32Data(arrow_column, internal_column); + } break; case arrow::Type::DATE64: fillColumnWithDate64Data(arrow_column, internal_column); @@ -520,8 +555,19 @@ namespace DB ); } - if (const auto * internal_type_it = std::find_if(arrow_type_to_internal_type.begin(), arrow_type_to_internal_type.end(), - [=](auto && elem) { return elem.first == arrow_type->id(); }); + auto filter = [=](auto && elem) + { + auto which = WhichDataType(column_type); + if (arrow_type->id() == arrow::Type::DATE32 && which.isDateOrDate32()) + { + return (strcmp(elem.second, "Date") == 0 && which.isDate()) || (strcmp(elem.second, "Date32") == 0 && which.isDate32()); + } + else + { + return elem.first == arrow_type->id(); + } + }; + if (const auto * internal_type_it = std::find_if(arrow_type_to_internal_type.begin(), arrow_type_to_internal_type.end(), filter); internal_type_it != arrow_type_to_internal_type.end()) { return DataTypeFactory::instance().get(internal_type_it->second); diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 45e46649b3a..831130d06d1 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -47,7 +47,7 @@ FillingTransform::FillingTransform( DataTypePtr to_type; /// TODO Wrong results for big integers. - if (isInteger(type) || which.isDate() || which.isDateTime()) + if (isInteger(type) || which.isDate() || which.isDate32() || which.isDateTime()) { max_type = Field::Types::Int64; to_type = std::make_shared(); diff --git a/src/Processors/printPipeline.cpp b/src/Processors/printPipeline.cpp index 5cdab1ed3ff..cbf8cb3a77d 100644 --- a/src/Processors/printPipeline.cpp +++ b/src/Processors/printPipeline.cpp @@ -103,7 +103,7 @@ void printPipelineCompact(const Processors & processors, WriteBuffer & out, bool out << "digraph\n{\n"; out << " rankdir=\"LR\";\n"; - out << " { node [shape = box]\n"; + out << " { node [shape = rect]\n"; /// Nodes // TODO quoting and escaping size_t next_step = 0; diff --git a/src/Processors/printPipeline.h b/src/Processors/printPipeline.h index 9497bc3cc3c..6ff5fb24c37 100644 --- a/src/Processors/printPipeline.h +++ b/src/Processors/printPipeline.h @@ -16,7 +16,7 @@ void printPipeline(const Processors & processors, const Statuses & statuses, Wri { out << "digraph\n{\n"; out << " rankdir=\"LR\";\n"; - out << " { node [shape = box]\n"; + out << " { node [shape = rect]\n"; auto get_proc_id = [](const IProcessor & proc) -> UInt64 { diff --git a/src/Server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp index 787f0fcd95e..30ae6f6fe42 100644 --- a/src/Server/PrometheusMetricsWriter.cpp +++ b/src/Server/PrometheusMetricsWriter.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include namespace { @@ -24,9 +24,13 @@ void writeOutLine(DB::WriteBuffer & wb, T && val, TArgs &&... args) writeOutLine(wb, std::forward(args)...); } -void replaceInvalidChars(std::string & metric_name) +/// Returns false if name is not valid +bool replaceInvalidChars(std::string & metric_name) { - std::replace(metric_name.begin(), metric_name.end(), '.', '_'); + /// dirty solution + metric_name = std::regex_replace(metric_name, std::regex("[^a-zA-Z0-9_:]"), "_"); + metric_name = std::regex_replace(metric_name, std::regex("^[^a-zA-Z]*"), ""); + return !metric_name.empty(); } } @@ -57,7 +61,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const std::string metric_name{ProfileEvents::getName(static_cast(i))}; std::string metric_doc{ProfileEvents::getDocumentation(static_cast(i))}; - replaceInvalidChars(metric_name); + if (!replaceInvalidChars(metric_name)) + continue; std::string key{profile_events_prefix + metric_name}; writeOutLine(wb, "# HELP", key, metric_doc); @@ -75,7 +80,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const std::string metric_name{CurrentMetrics::getName(static_cast(i))}; std::string metric_doc{CurrentMetrics::getDocumentation(static_cast(i))}; - replaceInvalidChars(metric_name); + if (!replaceInvalidChars(metric_name)) + continue; std::string key{current_metrics_prefix + metric_name}; writeOutLine(wb, "# HELP", key, metric_doc); @@ -91,7 +97,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const { std::string key{asynchronous_metrics_prefix + name_value.first}; - replaceInvalidChars(key); + if (!replaceInvalidChars(key)) + continue; auto value = name_value.second; // TODO: add HELP section? asynchronous_metrics contains only key and value @@ -108,7 +115,8 @@ void PrometheusMetricsWriter::write(WriteBuffer & wb) const std::string metric_name{CurrentStatusInfo::getName(static_cast(i))}; std::string metric_doc{CurrentStatusInfo::getDocumentation(static_cast(i))}; - replaceInvalidChars(metric_name); + if (!replaceInvalidChars(metric_name)) + continue; std::string key{current_status_prefix + metric_name}; writeOutLine(wb, "# HELP", key, metric_doc); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f311d58b7af..2575f244196 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -270,19 +270,17 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name, bool allow_nullable_key) { - for (const auto & action : expr.getActions()) - { - if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception(key_name + " key cannot contain array joins", ErrorCodes::ILLEGAL_COLUMN); + if (expr.hasArrayJoin()) + throw Exception(key_name + " key cannot contain array joins", ErrorCodes::ILLEGAL_COLUMN); - if (action.node->type == ActionsDAG::ActionType::FUNCTION) - { - IFunctionBase & func = *action.node->function_base; - if (!func.isDeterministic()) - throw Exception(key_name + " key cannot contain non-deterministic functions, " - "but contains function " + func.getName(), - ErrorCodes::BAD_ARGUMENTS); - } + try + { + expr.assertDeterministic(); + } + catch (Exception & e) + { + e.addMessage(fmt::format("for {} key", key_name)); + throw; } for (const ColumnWithTypeAndName & element : sample_block) @@ -418,7 +416,6 @@ void MergeTreeData::checkProperties( } checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key); - } void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) @@ -1088,7 +1085,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa } -void MergeTreeData::clearOldTemporaryDirectories(ssize_t custom_directories_lifetime_seconds) +void MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds) { /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock); @@ -1097,9 +1094,7 @@ void MergeTreeData::clearOldTemporaryDirectories(ssize_t custom_directories_life const auto settings = getSettings(); time_t current_time = time(nullptr); - ssize_t deadline = (custom_directories_lifetime_seconds >= 0) - ? current_time - custom_directories_lifetime_seconds - : current_time - settings->temporary_directories_lifetime.totalSeconds(); + ssize_t deadline = current_time - custom_directories_lifetime_seconds; /// Delete temporary directories older than a day. for (const auto & [path, disk] : getRelativeDataPathsWithDisks()) @@ -1517,6 +1512,7 @@ void checkVersionColumnTypesConversion(const IDataType * old_type, const IDataTy if ((which_old_type.isInt() && !which_new_type.isInt()) || (which_old_type.isUInt() && !which_new_type.isUInt()) || (which_old_type.isDate() && !which_new_type.isDate()) + || (which_old_type.isDate32() && !which_new_type.isDate32()) || (which_old_type.isDateTime() && !which_new_type.isDateTime()) || (which_old_type.isFloat() && !which_new_type.isFloat())) { @@ -3337,20 +3333,25 @@ MergeTreeData::getAllDataPartsVector(MergeTreeData::DataPartStateVector * out_st return res; } -std::vector -MergeTreeData::getDetachedParts() const +std::vector MergeTreeData::getDetachedParts() const { std::vector res; for (const auto & [path, disk] : getRelativeDataPathsWithDisks()) { - for (auto it = disk->iterateDirectory(fs::path(path) / MergeTreeData::DETACHED_DIR_NAME); it->isValid(); it->next()) - { - res.emplace_back(); - auto & part = res.back(); + String detached_path = fs::path(path) / MergeTreeData::DETACHED_DIR_NAME; - DetachedPartInfo::tryParseDetachedPartName(it->name(), part, format_version); - part.disk = disk->getName(); + /// Note: we don't care about TOCTOU issue here. + if (disk->exists(detached_path)) + { + for (auto it = disk->iterateDirectory(detached_path); it->isValid(); it->next()) + { + res.emplace_back(); + auto & part = res.back(); + + DetachedPartInfo::tryParseDetachedPartName(it->name(), part, format_version); + part.disk = disk->getName(); + } } } return res; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index a6ece4a7a98..55739a4d009 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -526,9 +526,8 @@ public: void clearOldWriteAheadLogs(); /// Delete all directories which names begin with "tmp" - /// Set non-negative parameter value to override MergeTreeSettings temporary_directories_lifetime - /// Must be called with locked lockForShare() because use relative_data_path. - void clearOldTemporaryDirectories(ssize_t custom_directories_lifetime_seconds = -1); + /// Must be called with locked lockForShare() because it's using relative_data_path. + void clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds); void clearEmptyParts(); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index f9ed30ed4ed..a777c244426 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -828,7 +828,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor UInt64 watch_prev_elapsed = 0; /// We count total amount of bytes in parts - /// and use direct_io + aio if there is more than min_merge_bytes_to_use_direct_io + /// and use direct_io if there is more than min_merge_bytes_to_use_direct_io bool read_with_direct_io = false; if (data_settings->min_merge_bytes_to_use_direct_io != 0) { diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index e82b1966461..2a3c7ed00a1 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -54,7 +54,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( MergeTreeReaderSettings reader_settings = { - /// bytes to use AIO (this is hack) + /// bytes to use direct IO (this is hack) .min_bytes_to_use_direct_io = read_with_direct_io ? 1UL : std::numeric_limits::max(), .max_read_buffer_size = DBMS_DEFAULT_BUFFER_SIZE, .save_marks_in_cache = false diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 10e2d77eb27..06856c73888 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -62,7 +62,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() /// Both use relative_data_path which changes during rename, so we /// do it under share lock storage.clearOldWriteAheadLogs(); - storage.clearOldTemporaryDirectories(); + storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 61fbbbc3086..b43cb6d71a0 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -28,6 +28,7 @@ #include #include +#include #define DBMS_STORAGE_LOG_DATA_FILE_EXTENSION ".bin" @@ -719,6 +720,34 @@ CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr contex } +IStorage::ColumnSizeByName StorageLog::getColumnSizes() const +{ + std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + ColumnSizeByName column_sizes; + FileChecker::Map file_sizes = file_checker.getFileSizes(); + + for (const auto & column : getInMemoryMetadata().getColumns().getAllPhysical()) + { + ISerialization::StreamCallback stream_callback = [&, this] (const ISerialization::SubstreamPath & substream_path) + { + String stream_name = ISerialization::getFileNameForStream(column, substream_path); + ColumnSize & size = column_sizes[column.name]; + auto it = files.find(stream_name); + if (it != files.end()) + size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; + }; + + ISerialization::SubstreamPath substream_path; + auto serialization = column.type->getDefaultSerialization(); + serialization->enumerateStreams(stream_callback, substream_path); + } + + return column_sizes; +} + void registerStorageLog(StorageFactory & factory) { StorageFactory::StorageFeatures features{ diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 6fea00edefd..799bad26c7c 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -45,6 +45,7 @@ public: bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } bool supportsSubcolumns() const override { return true; } + ColumnSizeByName getColumnSizes() const override; protected: /** Attach the table with the appropriate name, along the appropriate path (with / at the end), @@ -87,7 +88,7 @@ private: DiskPtr disk; String table_path; - std::shared_timed_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Files files; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 55ccd60ea38..05d18e65068 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1068,7 +1068,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(IBackgroundJobExecutor & execut /// All use relative_data_path which changes during rename /// so execute under share lock. clearOldPartsFromFilesystem(); - clearOldTemporaryDirectories(); + clearOldTemporaryDirectories(getSettings()->temporary_directories_lifetime.totalSeconds()); clearOldWriteAheadLogs(); clearOldMutations(); clearEmptyParts(); diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 689b1307f4d..342101d91cc 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -523,6 +524,34 @@ CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, ContextPtr co return file_checker.check(); } +IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const +{ + std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + ColumnSizeByName column_sizes; + FileChecker::Map file_sizes = file_checker.getFileSizes(); + + for (const auto & column : getInMemoryMetadata().getColumns().getAllPhysical()) + { + ISerialization::StreamCallback stream_callback = [&, this] (const ISerialization::SubstreamPath & substream_path) + { + String stream_name = ISerialization::getFileNameForStream(column, substream_path); + ColumnSize & size = column_sizes[column.name]; + auto it = files.find(stream_name); + if (it != files.end()) + size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; + }; + + ISerialization::SubstreamPath substream_path; + auto serialization = column.type->getDefaultSerialization(); + serialization->enumerateStreams(stream_callback, substream_path); + } + + return column_sizes; +} + void StorageTinyLog::truncate( const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) { diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index 71763a6403e..849b0731a47 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -45,6 +45,7 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) override; + ColumnSizeByName getColumnSizes() const override; protected: StorageTinyLog( DiskPtr disk_, @@ -71,7 +72,7 @@ private: Files files; FileChecker file_checker; - std::shared_timed_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Poco::Logger * log; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 8f65147bb11..0058b58f537 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -98,7 +98,7 @@ protected: Names cols_required_for_sorting_key; Names cols_required_for_primary_key; Names cols_required_for_sampling; - MergeTreeData::ColumnSizeByName column_sizes; + IStorage::ColumnSizeByName column_sizes; { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f45acb0efd9..bed8eadc19c 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -95,6 +95,7 @@ const char * auto_contributors[] { "Anatoly Pugachev", "ana-uvarova", "AnaUvarova", + "Andreas Hunkeler", "AndreevDm", "Andrei Bodrov", "Andrei Chulkov", @@ -280,6 +281,7 @@ const char * auto_contributors[] { "Dongdong Yang", "DoomzD", "Dr. Strange Looker", + "d.v.semenov", "eaxdev", "eejoin", "egatov", @@ -290,6 +292,7 @@ const char * auto_contributors[] { "Eldar Zaitov", "Elena Baskakova", "elenaspb2019", + "elevankoff", "Elghazal Ahmed", "Elizaveta Mironyuk", "emakarov", @@ -434,6 +437,7 @@ const char * auto_contributors[] { "Ivan Starkov", "ivanzhukov", "Ivan Zhukov", + "Jack Song", "JackyWoo", "Jacob Hayes", "jakalletti", @@ -476,6 +480,7 @@ const char * auto_contributors[] { "Konstantin Lebedev", "Konstantin Malanchev", "Konstantin Podshumok", + "Konstantin Rudenskii", "Korenevskiy Denis", "Korviakov Andrey", "koshachy", @@ -488,6 +493,7 @@ const char * auto_contributors[] { "kshvakov", "kssenii", "l", + "l1tsolaiki", "lalex", "Latysheva Alexandra", "lehasm", @@ -515,6 +521,7 @@ const char * auto_contributors[] { "long2ice", "Lopatin Konstantin", "Loud_Scream", + "ltybc-coder", "luc1ph3r", "Lucid Dreams", "Luis Bosque", @@ -633,6 +640,7 @@ const char * auto_contributors[] { "nicelulu", "Nickita", "Nickolay Yastrebov", + "nickzhwang", "Nicolae Vartolomei", "Nico Mandery", "Nico Piderman", @@ -871,6 +879,7 @@ const char * auto_contributors[] { "Veselkov Konstantin", "vic", "vicdashkov", + "Victor", "Victor Tarnavsky", "Viktor Taranenko", "vinity", @@ -947,6 +956,7 @@ const char * auto_contributors[] { "Yuriy Korzhenevskiy", "Yury Karpovich", "Yury Stankevich", + "ywill3", "zamulla", "zhang2014", "zhangshengyu", @@ -957,11 +967,13 @@ const char * auto_contributors[] { "Zhichun Wu", "Zhipeng", "zhukai", + "Zijie Lu", "zlx19950903", "Zoran Pandovski", "zvonand", "zvrr", "zvvr", + "zxc111", "zzsmdfj", "Артем Стрельцов", "Владислав Тихонов", @@ -980,6 +992,7 @@ const char * auto_contributors[] { "张风啸", "徐炘", "曲正鹏", + "未来星___费", "极客青年", "谢磊", "贾顺名(Jarvis)", diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 8b119492340..7a8ee75803f 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -223,7 +223,7 @@ void StorageSystemStackTrace::fillData(MutableColumns & res_columns, ContextPtr, { constexpr size_t comm_buf_size = 32; /// More than enough for thread name ReadBufferFromFile comm(thread_name_path.string(), comm_buf_size); - readStringUntilEOF(thread_name, comm); + readEscapedStringUntilEOL(thread_name, comm); comm.close(); } diff --git a/src/Storages/System/StorageSystemWarnings.cpp b/src/Storages/System/StorageSystemWarnings.cpp new file mode 100644 index 00000000000..76b35e9b555 --- /dev/null +++ b/src/Storages/System/StorageSystemWarnings.cpp @@ -0,0 +1,21 @@ +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemWarnings::getNamesAndTypes() +{ + return { + {"message", std::make_shared()}, + }; +} + +void StorageSystemWarnings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + for (const auto & warning : context->getWarnings()) + res_columns[0]->insert(warning); +} + +} diff --git a/src/Storages/System/StorageSystemWarnings.h b/src/Storages/System/StorageSystemWarnings.h new file mode 100644 index 00000000000..087c4016aff --- /dev/null +++ b/src/Storages/System/StorageSystemWarnings.h @@ -0,0 +1,27 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +/** Implements system.warnings table that contains warnings about server configuration + * to be displayed in clickhouse-client. + */ +class StorageSystemWarnings final : public shared_ptr_helper, + public IStorageSystemOneBlock { +public: + std::string getName() const override { return "SystemWarnings"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + friend struct shared_ptr_helper; + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const override; +}; +} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 7da65b09d6d..b3cc254a392 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #if !defined(ARCADIA_BUILD) @@ -116,6 +117,7 @@ void attachSystemTablesLocal(IDatabase & system_database) attach(system_database, "user_directories"); attach(system_database, "privileges"); attach(system_database, "errors"); + attach(system_database, "warnings"); attach(system_database, "data_skipping_indices"); #if !defined(ARCADIA_BUILD) attach(system_database, "licenses"); diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 495ec9c4fd6..0f09b973a63 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -118,6 +118,7 @@ SRCS( MySQL/MySQLSettings.cpp PartitionCommands.cpp ProjectionsDescription.cpp + ReadFinalForExternalReplicaStorage.cpp ReadInOrderOptimizer.cpp SelectQueryDescription.cpp SetSettings.cpp diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 4d3524c7563..40bfa2cbb6b 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -153,11 +153,6 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (arg_num < args.size()) throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - /// ExpressionAnalyzer will be created in InterpreterSelectQuery that will meet these `Identifier` when processing the request. - /// We need to mark them as the name of the database or table, because the default value is column. - for (auto ast : args) - setIdentifierSpecial(ast); - if (!cluster_name.empty()) { /// Use an existing cluster from the main config diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py index ef530c4836b..b5d40659629 100644 --- a/tests/integration/helpers/test_tools.py +++ b/tests/integration/helpers/test_tools.py @@ -39,6 +39,9 @@ class TSV: def __str__(self): return '\n'.join(self.lines) + def __repr__(self): + return self.__str__() + def __len__(self): return len(self.lines) diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py index 817c3571833..255af13213a 100644 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ b/tests/integration/test_cluster_copier/test_two_nodes.py @@ -473,17 +473,17 @@ def execute_task(started_cluster, task, cmd_options): # Tests -@pytest.mark.timeout(600) +@pytest.mark.skip(reason="Too flaky :(") def test_different_schema(started_cluster): execute_task(started_cluster, TaskWithDifferentSchema(started_cluster), []) -@pytest.mark.timeout(600) +@pytest.mark.skip(reason="Too flaky :(") def test_ttl_columns(started_cluster): execute_task(started_cluster, TaskTTL(started_cluster), []) -@pytest.mark.timeout(600) +@pytest.mark.skip(reason="Too flaky :(") def test_skip_index(started_cluster): execute_task(started_cluster, TaskSkipIndex(started_cluster), []) diff --git a/tests/integration/test_config_substitutions/configs/config_env.xml b/tests/integration/test_config_substitutions/configs/config_env.xml index 712855c47c0..2d63b9c688d 100644 --- a/tests/integration/test_config_substitutions/configs/config_env.xml +++ b/tests/integration/test_config_substitutions/configs/config_env.xml @@ -10,5 +10,8 @@ default default + + + diff --git a/tests/integration/test_config_substitutions/configs/config_incl.xml b/tests/integration/test_config_substitutions/configs/config_incl.xml index 383a23af1ff..43ec78ff8ef 100644 --- a/tests/integration/test_config_substitutions/configs/config_incl.xml +++ b/tests/integration/test_config_substitutions/configs/config_incl.xml @@ -1,5 +1,5 @@ - /etc/clickhouse-server/config.d/max_query_size.xml + /etc/clickhouse-server/config.d/include_from_source.xml @@ -11,5 +11,8 @@ default default + + + diff --git a/tests/integration/test_config_substitutions/configs/config_include_from_env.xml b/tests/integration/test_config_substitutions/configs/config_include_from_env.xml index 71e11235749..79b650f3d9e 100644 --- a/tests/integration/test_config_substitutions/configs/config_include_from_env.xml +++ b/tests/integration/test_config_substitutions/configs/config_include_from_env.xml @@ -11,5 +11,7 @@ default default + + diff --git a/tests/integration/test_config_substitutions/configs/config_zk.xml b/tests/integration/test_config_substitutions/configs/config_zk.xml index aa589e9f9d3..9fad5658445 100644 --- a/tests/integration/test_config_substitutions/configs/config_zk.xml +++ b/tests/integration/test_config_substitutions/configs/config_zk.xml @@ -10,5 +10,8 @@ default default + + + diff --git a/tests/integration/test_config_substitutions/configs/include_from_source.xml b/tests/integration/test_config_substitutions/configs/include_from_source.xml new file mode 100644 index 00000000000..6095180bb59 --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/include_from_source.xml @@ -0,0 +1,17 @@ + + 99999 + + + + + default + + + + + + + default + + + diff --git a/tests/integration/test_config_substitutions/configs/max_query_size.xml b/tests/integration/test_config_substitutions/configs/max_query_size.xml deleted file mode 100644 index 9ec61368be9..00000000000 --- a/tests/integration/test_config_substitutions/configs/max_query_size.xml +++ /dev/null @@ -1,3 +0,0 @@ - - 99999 - diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index 565cd1c0e97..aec3f1d3635 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -8,11 +8,11 @@ node2 = cluster.add_instance('node2', user_configs=['configs/config_env.xml'], env_variables={"MAX_QUERY_SIZE": "55555"}) node3 = cluster.add_instance('node3', user_configs=['configs/config_zk.xml'], with_zookeeper=True) node4 = cluster.add_instance('node4', user_configs=['configs/config_incl.xml'], - main_configs=['configs/max_query_size.xml']) # include value 77777 + main_configs=['configs/include_from_source.xml']) # include value 77777 node5 = cluster.add_instance('node5', user_configs=['configs/config_allow_databases.xml']) node6 = cluster.add_instance('node6', user_configs=['configs/config_include_from_env.xml'], - env_variables={"INCLUDE_FROM_ENV": "/etc/clickhouse-server/config.d/max_query_size.xml"}, - main_configs=['configs/max_query_size.xml']) + env_variables={"INCLUDE_FROM_ENV": "/etc/clickhouse-server/config.d/include_from_source.xml"}, + main_configs=['configs/include_from_source.xml']) @pytest.fixture(scope="module") @@ -20,6 +20,8 @@ def start_cluster(): try: def create_zk_roots(zk): zk.create(path="/setting/max_query_size", value=b"77777", makepath=True) + zk.create(path="/users_from_zk_1", value=b"default", makepath=True) + zk.create(path="/users_from_zk_2", value=b"default", makepath=True) cluster.add_zookeeper_startup_command(create_zk_roots) @@ -37,6 +39,18 @@ def test_config(start_cluster): assert node6.query("select value from system.settings where name = 'max_query_size'") == "99999\n" +def test_include_config(start_cluster): + # + assert node4.query("select 1") + assert node4.query("select 1", user="user_1") + assert node4.query("select 1", user="user_2") + + # any anyLast count + groupBitOr + groupBitAnd + groupBitXor @@ -119,7 +122,7 @@ SELECT {function}(value_1), {function}(value_2), - groupBitAnd(value_3), + sum(toUInt256(value_3)), {function}(value_3) FROM {table} GROUP BY key @@ -140,7 +143,7 @@ SELECT {function}If(value_1, predicate), {function}If(value_2, predicate), - groupBitAndIf(value_3, predicate), + sumIf(toUInt256(value_3), predicate), {function}If(value_3, predicate) FROM {table} GROUP BY key @@ -163,7 +166,7 @@ SELECT {function}(value_1), {function}(value_2), - groupBitAnd(value_3), + sum(toUInt256(value_3)), {function}(value_3), {function}(value_4), {function}(value_5) @@ -188,7 +191,7 @@ SELECT {function}If(value_1, predicate), {function}If(value_2, predicate), - groupBitAndIf(value_3, predicate), + sumIf(toUInt256(value_3), predicate), {function}If(value_3, predicate), {function}If(value_4, predicate), {function}If(value_5, predicate) @@ -212,7 +215,7 @@ SELECT {function}(WatchID), {function}(CounterID), - groupBitAnd(ClientIP), + sum(toUInt256(ClientIP)), {function}(ClientIP) FROM hits_100m_single GROUP BY intHash32(UserID) % {group_scale} @@ -224,8 +227,8 @@ {function}(WatchID), {function}(CounterID), {function}(ClientIP), - {function}(GoodEvent), - {function}(CounterClass) + {function}(IPNetworkID), + {function}(SearchEngineID) FROM hits_100m_single GROUP BY intHash32(UserID) % {group_scale} FORMAT Null @@ -235,10 +238,10 @@ SELECT {function}(WatchID), {function}(CounterID), - groupBitAnd(ClientIP), + sum(toUInt256(ClientIP)), {function}(ClientIP), - {function}(GoodEvent), - {function}(CounterClass) + {function}(IPNetworkID), + {function}(SearchEngineID) FROM hits_100m_single GROUP BY intHash32(UserID) % {group_scale} FORMAT Null @@ -260,7 +263,7 @@ SELECT {function}If(WatchID, predicate), {function}If(CounterID, predicate), - groupBitAndIf(ClientIP, predicate), + sumIf(toUInt256(ClientIP), predicate), {function}If(ClientIP, predicate) FROM hits_100m_single GROUP BY intHash32(UserID) % {group_scale} @@ -273,8 +276,8 @@ {function}If(WatchID, predicate), {function}If(CounterID, predicate), {function}If(ClientIP, predicate), - {function}If(GoodEvent, predicate), - {function}If(CounterClass, predicate) + {function}If(IPNetworkID, predicate), + {function}If(SearchEngineID, predicate) FROM hits_100m_single GROUP BY intHash32(UserID) % {group_scale} FORMAT Null @@ -285,10 +288,10 @@ SELECT {function}If(WatchID, predicate), {function}If(CounterID, predicate), - groupBitAndIf(ClientIP, predicate), + sumIf(toUInt256(ClientIP), predicate), {function}If(ClientIP, predicate), - {function}If(GoodEvent, predicate), - {function}If(CounterClass, predicate) + {function}If(IPNetworkID, predicate), + {function}If(SearchEngineID, predicate) FROM hits_100m_single GROUP BY intHash32(UserID) % {group_scale} FORMAT Null diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index 621bca2ec0e..1a9c81ef686 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -91,6 +91,11 @@ idx10 ['This','is','a','test'] === Try load data from datapage_v2.snappy.parquet Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: data for INSERT was parsed from stdin +=== Try load data from datatype-date32.parquet +1925-01-01 +1949-10-01 +2021-10-01 +2282-12-31 === Try load data from dict-page-offset-zero.parquet 1552 1552 diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index d9b34df8bf8..462449222b3 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -67,7 +67,7 @@ hello 1234567890.12345677879616925706 (1234567890.12345677879616925706,'test') 1234567890.123456695758468374595199311875 (1234567890.123456695758468374595199311875,'test') --JSONExtractKeysAndValues-- -[('a','hello')] +[('a','hello'),('b','[-100,200,300]')] [('b',[-100,200,300])] [('a','hello'),('b','world')] [('a',5),('b',7),('c',11)] @@ -170,7 +170,7 @@ Friday (3,5) (3,0) --JSONExtractKeysAndValues-- -[('a','hello')] +[('a','hello'),('b','[-100,200,300]')] [('b',[-100,200,300])] [('a','hello'),('b','world')] [('a',5),('b',7),('c',11)] diff --git a/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference b/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference index 3d8d7fb770d..e7d4ea81714 100644 --- a/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference +++ b/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference @@ -2,5 +2,4 @@ GLOBAL IN 0 0 0 -0 GLOBAL NOT IN diff --git a/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql b/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql index 588ea9c1048..ca9b28a14f4 100644 --- a/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql +++ b/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql @@ -1,10 +1,8 @@ SELECT 'GLOBAL IN'; select * from remote('localhost', system.one) where dummy global in (0); -select * from remote('localhost', system.one) where toUInt64(dummy) global in numbers(1); select * from remote('localhost', system.one) where dummy global in system.one; select * from remote('localhost', system.one) where dummy global in (select 0); SELECT 'GLOBAL NOT IN'; select * from remote('localhost', system.one) where dummy global not in (0); -select * from remote('localhost', system.one) where toUInt64(dummy) global not in numbers(1); select * from remote('localhost', system.one) where dummy global not in system.one; select * from remote('localhost', system.one) where dummy global not in (select 0); diff --git a/tests/queries/0_stateless/01232_untuple.reference b/tests/queries/0_stateless/01232_untuple.reference index 44f96e1decd..21fd0c4a8a5 100644 --- a/tests/queries/0_stateless/01232_untuple.reference +++ b/tests/queries/0_stateless/01232_untuple.reference @@ -2,7 +2,7 @@ hello 1 3 world 9 9 (0,1) -key v1 v2 v3 v4 v5 +key tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 1) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 2) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 3) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 4) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 5) 4 10 20 10 20 30 3 70 20 10 20 30 2 11 20 10 20 30 diff --git a/tests/queries/0_stateless/01601_custom_tld.reference b/tests/queries/0_stateless/01601_custom_tld.reference index e056505f273..04204ebf02a 100644 --- a/tests/queries/0_stateless/01601_custom_tld.reference +++ b/tests/queries/0_stateless/01601_custom_tld.reference @@ -22,3 +22,9 @@ foobar.com foobar.com foobar.com xx.blogspot.co.at +-- www +www.foo +foo +-- vector +xx.blogspot.co.at + diff --git a/tests/queries/0_stateless/01601_custom_tld.sql b/tests/queries/0_stateless/01601_custom_tld.sql index 688dd419858..ceb00d5ff19 100644 --- a/tests/queries/0_stateless/01601_custom_tld.sql +++ b/tests/queries/0_stateless/01601_custom_tld.sql @@ -29,3 +29,11 @@ select cutToFirstSignificantSubdomainCustom('http://foobar.com', 'public_suffix_ select cutToFirstSignificantSubdomainCustom('http://foobar.com/foo', 'public_suffix_list'); select cutToFirstSignificantSubdomainCustom('http://bar.foobar.com/foo', 'public_suffix_list'); select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at', 'public_suffix_list'); + +select '-- www'; +select cutToFirstSignificantSubdomainCustomWithWWW('http://www.foo', 'public_suffix_list'); +select cutToFirstSignificantSubdomainCustom('http://www.foo', 'public_suffix_list'); + +select '-- vector'; +select cutToFirstSignificantSubdomainCustom('http://xx.blogspot.co.at/' || toString(number), 'public_suffix_list') from numbers(1); +select cutToFirstSignificantSubdomainCustom('there-is-no-such-domain' || toString(number), 'public_suffix_list') from numbers(1); diff --git a/tests/queries/0_stateless/01616_untuple_access_field.reference b/tests/queries/0_stateless/01616_untuple_access_field.reference index d00491fd7e5..9874d6464ab 100644 --- a/tests/queries/0_stateless/01616_untuple_access_field.reference +++ b/tests/queries/0_stateless/01616_untuple_access_field.reference @@ -1 +1 @@ -1 +1 2 diff --git a/tests/queries/0_stateless/01616_untuple_access_field.sql b/tests/queries/0_stateless/01616_untuple_access_field.sql index 569efca5349..82cdf80c8bc 100644 --- a/tests/queries/0_stateless/01616_untuple_access_field.sql +++ b/tests/queries/0_stateless/01616_untuple_access_field.sql @@ -1 +1 @@ -select _ut_1 from (select untuple((1,2))); +select * from (select untuple((1,2))); diff --git a/tests/queries/0_stateless/01735_join_get_low_card_fix.reference b/tests/queries/0_stateless/01735_join_get_low_card_fix.reference index 0b20aead00e..a9e2f17562a 100644 --- a/tests/queries/0_stateless/01735_join_get_low_card_fix.reference +++ b/tests/queries/0_stateless/01735_join_get_low_card_fix.reference @@ -1 +1,6 @@ -yyy +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01735_join_get_low_card_fix.sql b/tests/queries/0_stateless/01735_join_get_low_card_fix.sql index bdc979bc11e..e2002112360 100644 --- a/tests/queries/0_stateless/01735_join_get_low_card_fix.sql +++ b/tests/queries/0_stateless/01735_join_get_low_card_fix.sql @@ -1,9 +1,14 @@ -drop table if exists join_tbl; +DROP TABLE IF EXISTS join_tbl; -create table join_tbl (`id` String, `name` String) engine Join(any, left, id); +CREATE TABLE join_tbl (`id` String, `name` String, lcname LowCardinality(String)) ENGINE = Join(any, left, id); -insert into join_tbl values ('xxx', 'yyy'); +INSERT INTO join_tbl VALUES ('xxx', 'yyy', 'yyy'); -select joinGet('join_tbl', 'name', toLowCardinality('xxx')); +SELECT joinGet('join_tbl', 'name', 'xxx') == 'yyy'; +SELECT joinGet('join_tbl', 'name', toLowCardinality('xxx')) == 'yyy'; +SELECT joinGet('join_tbl', 'name', toLowCardinality(materialize('xxx'))) == 'yyy'; +SELECT joinGet('join_tbl', 'lcname', 'xxx') == 'yyy'; +SELECT joinGet('join_tbl', 'lcname', toLowCardinality('xxx')) == 'yyy'; +SELECT joinGet('join_tbl', 'lcname', toLowCardinality(materialize('xxx'))) == 'yyy'; -drop table if exists join_tbl; +DROP TABLE IF EXISTS join_tbl; diff --git a/tests/queries/0_stateless/01763_support_map_lowcardinality_type.reference b/tests/queries/0_stateless/01763_support_map_lowcardinality_type.reference new file mode 100644 index 00000000000..8fdcdf3d8d5 --- /dev/null +++ b/tests/queries/0_stateless/01763_support_map_lowcardinality_type.reference @@ -0,0 +1,2 @@ +b +{'1':1} 1 0 diff --git a/tests/queries/0_stateless/01763_support_map_lowcardinality_type.sql b/tests/queries/0_stateless/01763_support_map_lowcardinality_type.sql new file mode 100644 index 00000000000..ccade153ca1 --- /dev/null +++ b/tests/queries/0_stateless/01763_support_map_lowcardinality_type.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS map_lc; +SET allow_experimental_map_type = 1; +CREATE TABLE map_lc +( + `kv` Map(LowCardinality(String), LowCardinality(String)) +) +ENGINE = Memory; + +INSERT INTO map_lc select map('a', 'b'); +SELECT kv['a'] FROM map_lc; +DROP TABLE map_lc; +SELECT map(toFixedString('1',1),1) AS m, m[toFixedString('1',1)],m[toFixedString('1',2)]; diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.reference b/tests/queries/0_stateless/01915_json_extract_raw_string.reference new file mode 100644 index 00000000000..839cb33f5f2 --- /dev/null +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.reference @@ -0,0 +1 @@ +('123','456','[7,8,9]') diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.sql b/tests/queries/0_stateless/01915_json_extract_raw_string.sql new file mode 100644 index 00000000000..6ba94ac6dfd --- /dev/null +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.sql @@ -0,0 +1 @@ +select JSONExtract('{"a": "123", "b": 456, "c": [7, 8, 9]}', 'Tuple(a String, b String, c String)'); diff --git a/tests/queries/0_stateless/01917_distinct_on.reference b/tests/queries/0_stateless/01917_distinct_on.reference new file mode 100644 index 00000000000..b5b231e5786 --- /dev/null +++ b/tests/queries/0_stateless/01917_distinct_on.reference @@ -0,0 +1,8 @@ +1 1 1 +2 2 2 +1 2 2 +1 1 1 +2 2 2 +1 2 2 +1 1 1 +2 2 2 diff --git a/tests/queries/0_stateless/01917_distinct_on.sql b/tests/queries/0_stateless/01917_distinct_on.sql new file mode 100644 index 00000000000..ae528b6e838 --- /dev/null +++ b/tests/queries/0_stateless/01917_distinct_on.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 (`a` UInt32, `b` UInt32, `c` UInt32 ) ENGINE = Memory; +INSERT INTO t1 VALUES (1, 1, 1), (1, 1, 2), (2, 2, 2), (1, 2, 2); + +SELECT DISTINCT ON (a, b) a, b, c FROM t1; +SELECT DISTINCT ON (a, b) * FROM t1; +SELECT DISTINCT ON (a) * FROM t1; + +-- fuzzer will fail, enable when fixed +-- SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 62 } + +-- SELECT DISTINCT ON a, b a, b FROM t1; -- { clientError 62 } +-- SELECT DISTINCT ON a a, b FROM t1; -- { clientError 62 } + +-- "Code: 47. DB::Exception: Missing columns: 'DISTINCT'" - error can be better +-- SELECT DISTINCT ON (a, b) DISTINCT a, b FROM t1; -- { serverError 47 } +-- SELECT DISTINCT DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } + +-- SELECT ALL DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } +-- SELECT DISTINCT ON (a, b) ALL a, b FROM t1; -- { clientError 62 } + +DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/01921_datatype_date32.reference b/tests/queries/0_stateless/01921_datatype_date32.reference new file mode 100644 index 00000000000..3efe9079cc2 --- /dev/null +++ b/tests/queries/0_stateless/01921_datatype_date32.reference @@ -0,0 +1,282 @@ +1925-01-01 +1925-01-01 +2282-12-31 +2283-11-11 +2021-06-22 +-------toYear--------- +1925 +1925 +2282 +2283 +2021 +-------toMonth--------- +1 +1 +12 +11 +6 +-------toQuarter--------- +1 +1 +4 +4 +2 +-------toDayOfMonth--------- +1 +1 +31 +11 +22 +-------toDayOfWeek--------- +4 +4 +7 +7 +2 +-------toDayOfYear--------- +1 +1 +365 +315 +173 +-------toHour--------- +-------toMinute--------- +-------toSecond--------- +-------toStartOfDay--------- +2061-02-06 07:28:16 +2061-02-06 07:28:16 +2010-10-17 12:03:28 +2011-08-28 12:03:28 +2021-06-22 00:00:00 +-------toMonday--------- +2104-06-04 +2104-06-04 +2103-07-21 +2104-05-31 +2021-06-21 +-------toISOWeek--------- +1 +1 +52 +45 +25 +-------toISOYear--------- +1925 +1925 +2282 +2283 +2021 +-------toWeek--------- +0 +0 +53 +45 +25 +-------toYearWeek--------- +192452 +192452 +228253 +228345 +202125 +-------toStartOfWeek--------- +2104-06-03 +2104-06-03 +2103-07-27 +2104-06-06 +2021-06-20 +-------toStartOfMonth--------- +2104-06-07 +2104-06-07 +2103-06-27 +2104-05-27 +2021-06-01 +-------toStartOfQuarter--------- +2104-06-07 +2104-06-07 +2103-04-27 +2104-04-26 +2021-04-01 +-------toStartOfYear--------- +2104-06-07 +2104-06-07 +2102-07-28 +2103-07-28 +2021-01-01 +-------toStartOfSecond--------- +-------toStartOfMinute--------- +-------toStartOfFiveMinute--------- +-------toStartOfTenMinutes--------- +-------toStartOfFifteenMinutes--------- +-------toStartOfHour--------- +-------toStartOfISOYear--------- +2104-06-04 +2104-06-04 +2102-07-29 +2103-07-28 +2021-01-04 +-------toRelativeYearNum--------- +1925 +1925 +2282 +2283 +2021 +-------toRelativeQuarterNum--------- +7700 +7700 +9131 +9135 +8085 +-------toRelativeMonthNum--------- +23101 +23101 +27396 +27407 +24258 +-------toRelativeWeekNum--------- +63189 +63189 +16331 +63188 +2686 +-------toRelativeDayNum--------- +49100 +49100 +48784 +49099 +18800 +-------toRelativeHourNum--------- +4294572851 +4294572851 +2743677 +2751237 +451197 +-------toRelativeMinuteNum--------- +4271299336 +4271299336 +164620620 +165074220 +27071820 +-------toRelativeSecondNum--------- +2874889696 +2874889696 +1287302608 +1314518608 +1624309200 +-------toTime--------- +-------toYYYYMM--------- +192501 +192501 +228212 +228311 +202106 +-------toYYYYMMDD--------- +19250101 +19250101 +22821231 +22831111 +20210622 +-------toYYYYMMDDhhmmss--------- +19250101000000 +19250101000000 +22821231000000 +22831111000000 +20210622000000 +-------addSeconds--------- +1925-01-01 01:00:00.000 +1925-01-01 01:00:00.000 +2282-12-31 01:00:00.000 +2283-11-11 01:00:00.000 +2021-06-22 01:00:00.000 +-------addMinutes--------- +1925-01-01 01:00:00.000 +1925-01-01 01:00:00.000 +2282-12-31 01:00:00.000 +2283-11-11 01:00:00.000 +2021-06-22 01:00:00.000 +-------addHours--------- +1925-01-01 12:00:00.000 +1925-01-01 12:00:00.000 +2282-12-31 12:00:00.000 +2283-11-11 12:00:00.000 +2021-06-22 12:00:00.000 +-------addDays--------- +1925-01-08 +1925-01-08 +2283-01-07 +1925-01-07 +2021-06-29 +-------addWeeks--------- +1925-01-08 +1925-01-08 +2283-01-07 +1925-01-07 +2021-06-29 +-------addMonths--------- +1925-02-01 +1925-02-01 +2283-01-31 +2283-11-11 +2021-07-22 +-------addQuarters--------- +1925-04-01 +1925-04-01 +2283-03-31 +1925-01-01 +2021-09-22 +-------addYears--------- +1926-01-01 +1926-01-01 +2283-11-11 +1925-01-01 +2022-06-22 +-------subtractSeconds--------- +1925-01-01 00:00:00.000 +1925-01-01 00:00:00.000 +2282-12-30 23:00:00.000 +2283-11-10 23:00:00.000 +2021-06-21 23:00:00.000 +-------subtractMinutes--------- +1925-01-01 00:00:00.000 +1925-01-01 00:00:00.000 +2282-12-30 23:00:00.000 +2283-11-10 23:00:00.000 +2021-06-21 23:00:00.000 +-------subtractHours--------- +1925-01-01 00:00:00.000 +1925-01-01 00:00:00.000 +2282-12-30 23:00:00.000 +2283-11-10 23:00:00.000 +2021-06-21 23:00:00.000 +-------subtractDays--------- +2283-11-05 +2283-11-05 +2282-12-24 +2283-11-04 +2021-06-15 +-------subtractWeeks--------- +2283-11-05 +2283-11-05 +2282-12-24 +2283-11-04 +2021-06-15 +-------subtractMonths--------- +1925-01-01 +1925-01-01 +2282-11-30 +2283-10-11 +2021-05-22 +-------subtractQuarters--------- +1925-01-01 +1925-01-01 +2282-09-30 +2283-08-11 +2021-03-22 +-------subtractYears--------- +1925-01-01 +1925-01-01 +2281-12-31 +2282-11-11 +2020-06-22 +-------toDate32--------- +1925-01-01 2000-01-01 +1925-01-01 1925-01-01 diff --git a/tests/queries/0_stateless/01921_datatype_date32.sql b/tests/queries/0_stateless/01921_datatype_date32.sql new file mode 100644 index 00000000000..5431736fab3 --- /dev/null +++ b/tests/queries/0_stateless/01921_datatype_date32.sql @@ -0,0 +1,118 @@ +drop table if exists t1; +create table t1(x1 Date32) engine Memory; + +insert into t1 values ('1925-01-01'),('1924-01-01'),('2282-12-31'),('2283-12-31'),('2021-06-22'); + +select x1 from t1; +select '-------toYear---------'; +select toYear(x1) from t1; +select '-------toMonth---------'; +select toMonth(x1) from t1; +select '-------toQuarter---------'; +select toQuarter(x1) from t1; +select '-------toDayOfMonth---------'; +select toDayOfMonth(x1) from t1; +select '-------toDayOfWeek---------'; +select toDayOfWeek(x1) from t1; +select '-------toDayOfYear---------'; +select toDayOfYear(x1) from t1; +select '-------toHour---------'; +select toHour(x1) from t1; -- { serverError 43 } +select '-------toMinute---------'; +select toMinute(x1) from t1; -- { serverError 43 } +select '-------toSecond---------'; +select toSecond(x1) from t1; -- { serverError 43 } +select '-------toStartOfDay---------'; +select toStartOfDay(x1) from t1; +select '-------toMonday---------'; +select toMonday(x1) from t1; +select '-------toISOWeek---------'; +select toISOWeek(x1) from t1; +select '-------toISOYear---------'; +select toISOYear(x1) from t1; +select '-------toWeek---------'; +select toWeek(x1) from t1; +select '-------toYearWeek---------'; +select toYearWeek(x1) from t1; +select '-------toStartOfWeek---------'; +select toStartOfWeek(x1) from t1; +select '-------toStartOfMonth---------'; +select toStartOfMonth(x1) from t1; +select '-------toStartOfQuarter---------'; +select toStartOfQuarter(x1) from t1; +select '-------toStartOfYear---------'; +select toStartOfYear(x1) from t1; +select '-------toStartOfSecond---------'; +select toStartOfSecond(x1) from t1; -- { serverError 43 } +select '-------toStartOfMinute---------'; +select toStartOfMinute(x1) from t1; -- { serverError 43 } +select '-------toStartOfFiveMinute---------'; +select toStartOfFiveMinute(x1) from t1; -- { serverError 43 } +select '-------toStartOfTenMinutes---------'; +select toStartOfTenMinutes(x1) from t1; -- { serverError 43 } +select '-------toStartOfFifteenMinutes---------'; +select toStartOfFifteenMinutes(x1) from t1; -- { serverError 43 } +select '-------toStartOfHour---------'; +select toStartOfHour(x1) from t1; -- { serverError 43 } +select '-------toStartOfISOYear---------'; +select toStartOfISOYear(x1) from t1; +select '-------toRelativeYearNum---------'; +select toRelativeYearNum(x1) from t1; +select '-------toRelativeQuarterNum---------'; +select toRelativeQuarterNum(x1) from t1; +select '-------toRelativeMonthNum---------'; +select toRelativeMonthNum(x1) from t1; +select '-------toRelativeWeekNum---------'; +select toRelativeWeekNum(x1) from t1; +select '-------toRelativeDayNum---------'; +select toRelativeDayNum(x1) from t1; +select '-------toRelativeHourNum---------'; +select toRelativeHourNum(x1) from t1; +select '-------toRelativeMinuteNum---------'; +select toRelativeMinuteNum(x1) from t1; +select '-------toRelativeSecondNum---------'; +select toRelativeSecondNum(x1) from t1; +select '-------toTime---------'; +select toTime(x1) from t1; -- { serverError 43 } +select '-------toYYYYMM---------'; +select toYYYYMM(x1) from t1; +select '-------toYYYYMMDD---------'; +select toYYYYMMDD(x1) from t1; +select '-------toYYYYMMDDhhmmss---------'; +select toYYYYMMDDhhmmss(x1) from t1; +select '-------addSeconds---------'; +select addSeconds(x1, 3600) from t1; +select '-------addMinutes---------'; +select addMinutes(x1, 60) from t1; +select '-------addHours---------'; +select addHours(x1, 12) from t1; +select '-------addDays---------'; +select addDays(x1, 7) from t1; +select '-------addWeeks---------'; +select addWeeks(x1, 1) from t1; +select '-------addMonths---------'; +select addMonths(x1, 1) from t1; +select '-------addQuarters---------'; +select addQuarters(x1, 1) from t1; +select '-------addYears---------'; +select addYears(x1, 1) from t1; +select '-------subtractSeconds---------'; +select subtractSeconds(x1, 3600) from t1; +select '-------subtractMinutes---------'; +select subtractMinutes(x1, 60) from t1; +select '-------subtractHours---------'; +select subtractHours(x1, 1) from t1; +select '-------subtractDays---------'; +select subtractDays(x1, 7) from t1; +select '-------subtractWeeks---------'; +select subtractWeeks(x1, 1) from t1; +select '-------subtractMonths---------'; +select subtractMonths(x1, 1) from t1; +select '-------subtractQuarters---------'; +select subtractQuarters(x1, 1) from t1; +select '-------subtractYears---------'; +select subtractYears(x1, 1) from t1; +select '-------toDate32---------'; +select toDate32('1925-01-01'), toDate32(toDate('2000-01-01')); +select toDate32OrZero('1924-01-01'), toDate32OrNull('1924-01-01'); + diff --git a/tests/queries/0_stateless/01926_bin_unbin.reference b/tests/queries/0_stateless/01926_bin_unbin.reference index f84a858e449..731d0223bb9 100644 --- a/tests/queries/0_stateless/01926_bin_unbin.reference +++ b/tests/queries/0_stateless/01926_bin_unbin.reference @@ -33,3 +33,7 @@ 1 1 1 +1 +1 +2D000000000000000A +001011010000000000000000000000000000000000000000000000000000000000001010 diff --git a/tests/queries/0_stateless/01926_bin_unbin.sql b/tests/queries/0_stateless/01926_bin_unbin.sql index 555770d09c6..e112f8bd8a4 100644 --- a/tests/queries/0_stateless/01926_bin_unbin.sql +++ b/tests/queries/0_stateless/01926_bin_unbin.sql @@ -37,3 +37,9 @@ select bin(unbin('0')) == '00000000'; select hex('') == bin(''); select unhex('') == unbin(''); select unhex('0') == unbin('0'); + +-- hex and bin support AggregateFunction +select hex(sumState(number)) == hex(toString(sumState(number))) from numbers(10); +select hex(avgState(number)) == hex(toString(avgState(number))) from numbers(99); +select hex(avgState(number)) from numbers(10); +select bin(avgState(number)) from numbers(10); diff --git a/tests/queries/0_stateless/01932_global_in_function.reference b/tests/queries/0_stateless/01932_global_in_function.reference new file mode 100644 index 00000000000..44e0be8e356 --- /dev/null +++ b/tests/queries/0_stateless/01932_global_in_function.reference @@ -0,0 +1,4 @@ +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/01932_global_in_function.sql b/tests/queries/0_stateless/01932_global_in_function.sql new file mode 100644 index 00000000000..467bf6c3495 --- /dev/null +++ b/tests/queries/0_stateless/01932_global_in_function.sql @@ -0,0 +1,2 @@ +select number from cluster(test_cluster_two_shards_localhost, numbers(1)) where number global in tuple(0, 1, 2, 3); +select number from cluster(test_cluster_two_shards_localhost, numbers(1)) where number global in array(0, 1, 2, 3); diff --git a/tests/queries/0_stateless/01932_remote_sharding_key_column.reference b/tests/queries/0_stateless/01932_remote_sharding_key_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01932_remote_sharding_key_column.sql b/tests/queries/0_stateless/01932_remote_sharding_key_column.sql new file mode 100644 index 00000000000..ded2f187821 --- /dev/null +++ b/tests/queries/0_stateless/01932_remote_sharding_key_column.sql @@ -0,0 +1,15 @@ +-- regression test for the following query: +-- +-- select * from remote('127.1', system.one, dummy) +-- +-- that produce the following error before: +-- +-- Unknown column: dummy, there are only columns . +-- +-- NOTE: that wrapping column into any function works before. +select * from remote('127.1', system.one, dummy) format Null; +select * from remote('127.1', system.one, identity(dummy)) format Null; +select * from remote('127.1', view(select * from system.one), identity(dummy)) format Null; +select * from remote('127.{1,2}', view(select * from system.one), identity(dummy)) format Null; +select * from remote('127.1', view(select * from system.one), dummy) format Null; +select * from remote('127.{1,2}', view(select * from system.one), dummy) format Null; diff --git a/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference b/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference new file mode 100644 index 00000000000..0989a305613 --- /dev/null +++ b/tests/queries/0_stateless/01940_custom_tld_sharding_key.reference @@ -0,0 +1 @@ +foo.com diff --git a/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql b/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql new file mode 100644 index 00000000000..5d38cfb18dc --- /dev/null +++ b/tests/queries/0_stateless/01940_custom_tld_sharding_key.sql @@ -0,0 +1,2 @@ +select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('foo.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1; +select * from remote('127.{1,2}', view(select 'foo.com' key), cityHash64(key)) where key = cutToFirstSignificantSubdomainCustom('bar.com', 'public_suffix_list') settings optimize_skip_unused_shards=1, force_optimize_skip_unused_shards=1; diff --git a/tests/queries/0_stateless/01940_pad_string.reference b/tests/queries/0_stateless/01940_pad_string.reference new file mode 100644 index 00000000000..22cd3f9be07 --- /dev/null +++ b/tests/queries/0_stateless/01940_pad_string.reference @@ -0,0 +1,54 @@ +leftPad + +a +ab +abc + abc + abc + abc +ab +*abc +**abc +*******abc +ab +*abc +*.abc +*.*.*.*abc +leftPadUTF8 +а +аб +аб +абвг +ЧАабвг +ЧАСЧАСЧАабвг +rightPad + +a +ab +abc +abc +abc +abc +ab +abc* +abc** +abc******* +ab +abc* +abc*. +abc*.*.*.* +rightPadUTF8 +а +аб +аб +абвг +абвгЧА +абвгЧАСЧАСЧА +numbers + +1^ +_2^^ +__3^^^ +___4^^^^ +____5^^^^^ +_____6^^^^^^ diff --git a/tests/queries/0_stateless/01940_pad_string.sql b/tests/queries/0_stateless/01940_pad_string.sql new file mode 100644 index 00000000000..e4ba0aec6d2 --- /dev/null +++ b/tests/queries/0_stateless/01940_pad_string.sql @@ -0,0 +1,54 @@ +SELECT 'leftPad'; +SELECT leftPad('abc', 0); +SELECT leftPad('abc', 1); +SELECT leftPad('abc', 2); +SELECT leftPad('abc', 3); +SELECT leftPad('abc', 4); +SELECT leftPad('abc', 5); +SELECT leftPad('abc', 10); + +SELECT leftPad('abc', 2, '*'); +SELECT leftPad('abc', 4, '*'); +SELECT leftPad('abc', 5, '*'); +SELECT leftPad('abc', 10, '*'); +SELECT leftPad('abc', 2, '*.'); +SELECT leftPad('abc', 4, '*.'); +SELECT leftPad('abc', 5, '*.'); +SELECT leftPad('abc', 10, '*.'); + +SELECT 'leftPadUTF8'; +SELECT leftPad('абвг', 2); +SELECT leftPadUTF8('абвг', 2); +SELECT leftPad('абвг', 4); +SELECT leftPadUTF8('абвг', 4); +SELECT leftPad('абвг', 12, 'ЧАС'); +SELECT leftPadUTF8('абвг', 12, 'ЧАС'); + +SELECT 'rightPad'; +SELECT rightPad('abc', 0); +SELECT rightPad('abc', 1); +SELECT rightPad('abc', 2); +SELECT rightPad('abc', 3); +SELECT rightPad('abc', 4); +SELECT rightPad('abc', 5); +SELECT rightPad('abc', 10); + +SELECT rightPad('abc', 2, '*'); +SELECT rightPad('abc', 4, '*'); +SELECT rightPad('abc', 5, '*'); +SELECT rightPad('abc', 10, '*'); +SELECT rightPad('abc', 2, '*.'); +SELECT rightPad('abc', 4, '*.'); +SELECT rightPad('abc', 5, '*.'); +SELECT rightPad('abc', 10, '*.'); + +SELECT 'rightPadUTF8'; +SELECT rightPad('абвг', 2); +SELECT rightPadUTF8('абвг', 2); +SELECT rightPad('абвг', 4); +SELECT rightPadUTF8('абвг', 4); +SELECT rightPad('абвг', 12, 'ЧАС'); +SELECT rightPadUTF8('абвг', 12, 'ЧАС'); + +SELECT 'numbers'; +SELECT rightPad(leftPad(toString(number), number, '_'), number*2, '^') FROM numbers(7); diff --git a/tests/queries/0_stateless/01940_point_in_polygon_ubsan.reference b/tests/queries/0_stateless/01940_point_in_polygon_ubsan.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/01940_point_in_polygon_ubsan.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01940_point_in_polygon_ubsan.sql b/tests/queries/0_stateless/01940_point_in_polygon_ubsan.sql new file mode 100644 index 00000000000..d011725691f --- /dev/null +++ b/tests/queries/0_stateless/01940_point_in_polygon_ubsan.sql @@ -0,0 +1,2 @@ +SET validate_polygons = 0; +SELECT pointInPolygon((-inf, 1023), [(10.000100135803223, 10000000000.), (inf, 0.9998999834060669), (1.1920928955078125e-7, 100.0000991821289), (1.000100016593933, 100.0000991821289)]); diff --git a/tests/queries/0_stateless/01941_dict_get_has_complex_single_key.reference b/tests/queries/0_stateless/01941_dict_get_has_complex_single_key.reference new file mode 100644 index 00000000000..c7e9cb788cb --- /dev/null +++ b/tests/queries/0_stateless/01941_dict_get_has_complex_single_key.reference @@ -0,0 +1,10 @@ +dictGet +Value +Value +Value +Value +dictHas +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01941_dict_get_has_complex_single_key.sql b/tests/queries/0_stateless/01941_dict_get_has_complex_single_key.sql new file mode 100644 index 00000000000..a44107d6882 --- /dev/null +++ b/tests/queries/0_stateless/01941_dict_get_has_complex_single_key.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS test_dictionary_source; +CREATE TABLE test_dictionary_source (key String, value String) ENGINE=TinyLog; + +INSERT INTO test_dictionary_source VALUES ('Key', 'Value'); + +DROP DICTIONARY IF EXISTS test_dictionary; +CREATE DICTIONARY test_dictionary(key String, value String) +PRIMARY KEY key +LAYOUT(COMPLEX_KEY_HASHED()) +SOURCE(CLICKHOUSE(TABLE 'test_dictionary_source')) +LIFETIME(0); + +SELECT 'dictGet'; +SELECT dictGet('test_dictionary', 'value', tuple('Key')); +SELECT dictGet('test_dictionary', 'value', tuple(materialize('Key'))); +SELECT dictGet('test_dictionary', 'value', 'Key'); +SELECT dictGet('test_dictionary', 'value', materialize('Key')); + +SELECT 'dictHas'; +SELECT dictHas('test_dictionary', tuple('Key')); +SELECT dictHas('test_dictionary', tuple(materialize('Key'))); +SELECT dictHas('test_dictionary', 'Key'); +SELECT dictHas('test_dictionary', materialize('Key')); + +DROP DICTIONARY test_dictionary; +DROP TABLE test_dictionary_source; diff --git a/tests/queries/0_stateless/01942_untuple_transformers_msan.reference b/tests/queries/0_stateless/01942_untuple_transformers_msan.reference new file mode 100644 index 00000000000..82dea36febd --- /dev/null +++ b/tests/queries/0_stateless/01942_untuple_transformers_msan.reference @@ -0,0 +1 @@ +100.0000991821289 \N \N 1 1024 \N diff --git a/tests/queries/0_stateless/01942_untuple_transformers_msan.sql b/tests/queries/0_stateless/01942_untuple_transformers_msan.sql new file mode 100644 index 00000000000..c1be25d34ac --- /dev/null +++ b/tests/queries/0_stateless/01942_untuple_transformers_msan.sql @@ -0,0 +1 @@ +SELECT untuple(tuple(100.0000991821289)), NULL, untuple((toDateTime(9223372036854775806, -1, NULL, NULL, toDateTime(NULL, NULL)), * EXCEPT b)), NULL FROM (SELECT 1 AS a, 1024, NULL AS b); diff --git a/tests/queries/0_stateless/01943_log_column_sizes.reference b/tests/queries/0_stateless/01943_log_column_sizes.reference new file mode 100644 index 00000000000..91ae12e38ce --- /dev/null +++ b/tests/queries/0_stateless/01943_log_column_sizes.reference @@ -0,0 +1,6 @@ +27 +33 +105 +27 +33 +105 diff --git a/tests/queries/0_stateless/01943_log_column_sizes.sql b/tests/queries/0_stateless/01943_log_column_sizes.sql new file mode 100644 index 00000000000..c6cd48c33d9 --- /dev/null +++ b/tests/queries/0_stateless/01943_log_column_sizes.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test_log; +DROP TABLE IF EXISTS test_tiny_log; + +CREATE TABLE test_log (x UInt8, s String, a Array(Nullable(String))) ENGINE = Log; +CREATE TABLE test_tiny_log (x UInt8, s String, a Array(Nullable(String))) ENGINE = TinyLog; + +INSERT INTO test_log VALUES (64, 'Value1', ['Value2', 'Value3', NULL]); +INSERT INTO test_tiny_log VALUES (64, 'Value1', ['Value2', 'Value3', NULL]); + +SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_log' AND database = currentDatabase(); +SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_tiny_log' AND database = currentDatabase(); + +DROP TABLE test_log; +DROP TABLE test_tiny_log; \ No newline at end of file diff --git a/tests/queries/0_stateless/01943_non_deterministic_order_key.reference b/tests/queries/0_stateless/01943_non_deterministic_order_key.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01943_non_deterministic_order_key.sql b/tests/queries/0_stateless/01943_non_deterministic_order_key.sql new file mode 100644 index 00000000000..200a88ec677 --- /dev/null +++ b/tests/queries/0_stateless/01943_non_deterministic_order_key.sql @@ -0,0 +1,3 @@ +CREATE TABLE a (number UInt64) ENGINE = MergeTree ORDER BY if(now() > toDateTime('2020-06-01 13:31:40'), toInt64(number), -number); -- { serverError 36 } +CREATE TABLE b (number UInt64) ENGINE = MergeTree ORDER BY now() > toDateTime(number); -- { serverError 36 } +CREATE TABLE c (number UInt64) ENGINE = MergeTree ORDER BY now(); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01945_system_warnings.expect b/tests/queries/0_stateless/01945_system_warnings.expect new file mode 100755 index 00000000000..56d219e1040 --- /dev/null +++ b/tests/queries/0_stateless/01945_system_warnings.expect @@ -0,0 +1,40 @@ +#!/usr/bin/expect -f + +# This is a test for system.warnings. Testing in interactive mode is necessary, +# as we want to see certain warnings from client + +log_user 0 +set timeout 60 +match_max 100000 + +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" +expect ":) " + +#find out BUILD TYPE +send -- "SELECT value FROM system.build_options WHERE name='BUILD_TYPE'\r" +expect { + "Debug" { + # Check debug message in system.warnings + send -- "SELECT message FROM system.warnings WHERE message='Server was built in debug mode. It will work slowly.'\r" + expect "Server was built in debug mode. It will work slowly." + expect ":) " + } + "RelWithDebInfo" { + # Check empty to find out existence + send -- "SELECT message FROM system.warnings WHERE 0=1\r" + expect "Ok." + expect ":) " + } +} + +# Finish test +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/data_parquet/datatype-date32.parquet b/tests/queries/0_stateless/data_parquet/datatype-date32.parquet new file mode 100644 index 00000000000..e9ba92a6948 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/datatype-date32.parquet differ diff --git a/tests/queries/0_stateless/data_parquet/datatype-date32.parquet.columns b/tests/queries/0_stateless/data_parquet/datatype-date32.parquet.columns new file mode 100644 index 00000000000..202a8a7087b --- /dev/null +++ b/tests/queries/0_stateless/data_parquet/datatype-date32.parquet.columns @@ -0,0 +1 @@ +`date32` Date32 \ No newline at end of file diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.reference b/tests/queries/1_stateful/00165_jit_aggregate_functions.reference index 2d94ad190ca..451a676754c 100644 --- a/tests/queries/1_stateful/00165_jit_aggregate_functions.reference +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.reference @@ -1,128 +1,128 @@ Aggregation using JIT compilation Simple functions -1704509 4611700827100483880 9223360787015464643 10441337359398154812 19954243669348.844 9648741.579254271 523264 -732797 4611701940806302259 9223355550934604746 977192643464016658 2054229034942.3723 51998323.94457991 475698 -598875 4611701407242345792 9223362250391155632 9312163881623734456 27615161624211.875 12261797.824844675 337212 -792887 4611699550286611812 9223290551912005343 6930300520201292824 27479710385933.586 53095331.60360441 252197 -3807842 4611710821592843606 9223326163906184987 16710274896338005145 85240848090850.69 22373416.533275086 196036 -25703952 4611709443519524003 9223353913449113943 9946868158853570839 67568783303242.086 3154349.826950714 147211 -716829 4611852156092872082 9223361623076951140 15381015774917924786 170693446547158.72 201431892.4773785 90109 -59183 4611730685242027332 9223354909338698162 8078812522502896568 94622946187035.42 1425270865.0901496 85379 -33010362 4611704682869732882 9223268545373999677 2064452191838585926 26532987929602.555 3695122.4062526934 77807 -800784 4611752907938305166 9223340418389788041 18082918611792817587 233352070043266.62 36535786.81446395 77492 -20810645 4611712185532639162 9223218900001937412 4996531385439292694 68246505203164.63 6316535.831023813 73213 -25843850 4611690025407720929 9223346023778617822 12755881190906812868 185015319325648.16 9962165.34831339 68945 -23447120 4611796031755620254 9223329309291309758 17231649548755339966 255019232629204.38 7937191.271698021 67570 -14739804 4611692230555590277 9223313509005166531 2458378896777063244 38308020331864.36 14590240.469105456 64174 -32077710 4611884228437061959 9223352444952988904 12965822147651192908 214467085941034.7 7257521.096258734 60456 -22446879 4611846229717089436 9223124373140579096 13530160492087688838 231724477077663.4 4737362.521046629 58389 -170282 4611833225706935900 9223371583739401906 8076893424988479310 141657635880324.8 1613795518.1065989 57017 -11482817 4611708000353743073 9223337838355779113 14841435427430843458 283531099960470.8 9938452.835998287 52345 -63469 4611695097019173921 9223353530156141191 6296784708578574520 120762239817777.88 579655378.4603049 52142 -29103473 4611744585914335132 9223333530281362537 5908285283932344933 123712996438970.34 867841.595541967 47758 +1704509 4611700827100483880 9223360787015464643 10441337359398154812 19954243669348.844 9648741.579254271 523264 9223372036854775807 4611686018427387904 4544239379628300646 +732797 4611701940806302259 9223355550934604746 977192643464016658 2054229034942.3723 51998323.94457991 475698 9223372036854775807 4611686018427387904 4091184823334377716 +598875 4611701407242345792 9223362250391155632 9312163881623734456 27615161624211.875 12261797.824844675 337212 9223372036854775807 4611686018427387904 3725992504798702670 +792887 4611699550286611812 9223290551912005343 6930300520201292824 27479710385933.586 53095331.60360441 252197 9223372036854775807 4611686018427387904 6536441508464694614 +3807842 4611710821592843606 9223326163906184987 16710274896338005145 85240848090850.69 22373416.533275086 196036 9223372036854775807 4611686018427387904 1797862753609257231 +25703952 4611709443519524003 9223353913449113943 9946868158853570839 67568783303242.086 3154349.826950714 147211 9223372036854775807 4611686018427387904 8737124378202300429 +716829 4611852156092872082 9223361623076951140 15381015774917924786 170693446547158.72 201431892.4773785 90109 9223372036854775807 4611686018427387904 8209915323001116338 +59183 4611730685242027332 9223354909338698162 8078812522502896568 94622946187035.42 1425270865.0901496 85379 9223372036854775807 4611686018427387904 8909082036598843562 +33010362 4611704682869732882 9223268545373999677 2064452191838585926 26532987929602.555 3695122.4062526934 77807 9223372036854775807 4611686018427387904 5411365383789552292 +800784 4611752907938305166 9223340418389788041 18082918611792817587 233352070043266.62 36535786.81446395 77492 9223372036854775807 4611686018427387904 2059255810151375435 +20810645 4611712185532639162 9223218900001937412 4996531385439292694 68246505203164.63 6316535.831023813 73213 9223372036854775807 4611686018427387904 8852740550386113674 +25843850 4611690025407720929 9223346023778617822 12755881190906812868 185015319325648.16 9962165.34831339 68945 9223372036854775807 4611686018427387904 7849665866595760148 +23447120 4611796031755620254 9223329309291309758 17231649548755339966 255019232629204.38 7937191.271698021 67570 9223372036854775807 4611686018427387904 3435410911925610424 +14739804 4611692230555590277 9223313509005166531 2458378896777063244 38308020331864.36 14590240.469105456 64174 9223372036854775807 4611686018427387904 511910855240035342 +32077710 4611884228437061959 9223352444952988904 12965822147651192908 214467085941034.7 7257521.096258734 60456 9223372036854775807 4611686018427387904 2256071920672551964 +22446879 4611846229717089436 9223124373140579096 13530160492087688838 231724477077663.4 4737362.521046629 58389 9223372036854775807 4611686018427387904 6236276364886386410 +170282 4611833225706935900 9223371583739401906 8076893424988479310 141657635880324.8 1613795518.1065989 57017 9223372036854775807 4611686018427387904 4755775861151848768 +11482817 4611708000353743073 9223337838355779113 14841435427430843458 283531099960470.8 9938452.835998287 52345 9223372036854775807 4611686018427387904 5371586112642152558 +63469 4611695097019173921 9223353530156141191 6296784708578574520 120762239817777.88 579655378.4603049 52142 9223372036854775807 4611686018427387904 4150567963952988110 +29103473 4611744585914335132 9223333530281362537 5908285283932344933 123712996438970.34 867841.595541967 47758 9223372036854775807 4611686018427387904 3238284030821087319 Simple functions with non compilable function -1704509 4611700827100483880 9223360787015464643 10441337359398154812 4611686018427387904 19954243669348.844 9648741.579254271 523264 -732797 4611701940806302259 9223355550934604746 977192643464016658 4611686018427387904 2054229034942.3723 51998323.94457991 475698 -598875 4611701407242345792 9223362250391155632 9312163881623734456 4611686018427387904 27615161624211.875 12261797.824844675 337212 -792887 4611699550286611812 9223290551912005343 6930300520201292824 4611686018427387904 27479710385933.586 53095331.60360441 252197 -3807842 4611710821592843606 9223326163906184987 16710274896338005145 4611686018427387904 85240848090850.69 22373416.533275086 196036 -25703952 4611709443519524003 9223353913449113943 9946868158853570839 4611686018427387904 67568783303242.086 3154349.826950714 147211 -716829 4611852156092872082 9223361623076951140 15381015774917924786 4611686018427387904 170693446547158.72 201431892.4773785 90109 -59183 4611730685242027332 9223354909338698162 8078812522502896568 4611686018427387904 94622946187035.42 1425270865.0901496 85379 -33010362 4611704682869732882 9223268545373999677 2064452191838585926 4611686018427387904 26532987929602.555 3695122.4062526934 77807 -800784 4611752907938305166 9223340418389788041 18082918611792817587 4611686018427387904 233352070043266.62 36535786.81446395 77492 -20810645 4611712185532639162 9223218900001937412 4996531385439292694 4611686018427387904 68246505203164.63 6316535.831023813 73213 -25843850 4611690025407720929 9223346023778617822 12755881190906812868 4611686018427387904 185015319325648.16 9962165.34831339 68945 -23447120 4611796031755620254 9223329309291309758 17231649548755339966 4611686018427387904 255019232629204.38 7937191.271698021 67570 -14739804 4611692230555590277 9223313509005166531 2458378896777063244 4611686018427387904 38308020331864.36 14590240.469105456 64174 -32077710 4611884228437061959 9223352444952988904 12965822147651192908 4611686018427387904 214467085941034.7 7257521.096258734 60456 -22446879 4611846229717089436 9223124373140579096 13530160492087688838 4611686018427387904 231724477077663.4 4737362.521046629 58389 -170282 4611833225706935900 9223371583739401906 8076893424988479310 4611686018427387904 141657635880324.8 1613795518.1065989 57017 -11482817 4611708000353743073 9223337838355779113 14841435427430843458 4611686018427387904 283531099960470.8 9938452.835998287 52345 -63469 4611695097019173921 9223353530156141191 6296784708578574520 4611686018427387904 120762239817777.88 579655378.4603049 52142 -29103473 4611744585914335132 9223333530281362537 5908285283932344933 4611686018427387904 123712996438970.34 867841.595541967 47758 +1704509 4611700827100483880 9223360787015464643 10441337359398154812 3620921835565807284859452 19954243669348.844 9648741.579254271 523264 9223372036854775807 4611686018427387904 4544239379628300646 +732797 4611701940806302259 9223355550934604746 977192643464016658 3289442827160604417733394 2054229034942.3723 51998323.94457991 475698 9223372036854775807 4611686018427387904 4091184823334377716 +598875 4611701407242345792 9223362250391155632 9312163881623734456 2330921446573746856380600 27615161624211.875 12261797.824844675 337212 9223372036854775807 4611686018427387904 3725992504798702670 +792887 4611699550286611812 9223290551912005343 6930300520201292824 1745179600137886041476120 27479710385933.586 53095331.60360441 252197 9223372036854775807 4611686018427387904 6536441508464694614 +3807842 4611710821592843606 9223326163906184987 16710274896338005145 1356295121550317411019929 85240848090850.69 22373416.533275086 196036 9223372036854775807 4611686018427387904 1797862753609257231 +25703952 4611709443519524003 9223353913449113943 9946868158853570839 1018731388338768841564439 67568783303242.086 3154349.826950714 147211 9223372036854775807 4611686018427387904 8737124378202300429 +716829 4611852156092872082 9223361623076951140 15381015774917924786 623810478612337115371442 170693446547158.72 201431892.4773785 90109 9223372036854775807 4611686018427387904 8209915323001116338 +59183 4611730685242027332 9223354909338698162 8078812522502896568 589916507545680254024632 94622946187035.42 1425270865.0901496 85379 9223372036854775807 4611686018427387904 8909082036598843562 +33010362 4611704682869732882 9223268545373999677 2064452191838585926 538517864195994778911814 26532987929602.555 3695122.4062526934 77807 9223372036854775807 4611686018427387904 5411365383789552292 +800784 4611752907938305166 9223340418389788041 18082918611792817587 535545510122473785781683 233352070043266.62 36535786.81446395 77492 9223372036854775807 4611686018427387904 2059255810151375435 +20810645 4611712185532639162 9223218900001937412 4996531385439292694 506405014842860050255126 68246505203164.63 6316535.831023813 73213 9223372036854775807 4611686018427387904 8852740550386113674 +25843850 4611690025407720929 9223346023778617822 12755881190906812868 476547495537329753708996 185015319325648.16 9962165.34831339 68945 9223372036854775807 4611686018427387904 7849665866595760148 +23447120 4611796031755620254 9223329309291309758 17231649548755339966 467236365548464278670014 255019232629204.38 7937191.271698021 67570 9223372036854775807 4611686018427387904 3435410911925610424 +14739804 4611692230555590277 9223313509005166531 2458378896777063244 444126268697527941770060 38308020331864.36 14590240.469105456 64174 9223372036854775807 4611686018427387904 511910855240035342 +32077710 4611884228437061959 9223352444952988904 12965822147651192908 417407443977973675608140 214467085941034.7 7257521.096258734 60456 9223372036854775807 4611686018427387904 2256071920672551964 +22446879 4611846229717089436 9223124373140579096 13530160492087688838 403462269796593691082374 231724477077663.4 4737362.521046629 58389 9223372036854775807 4611686018427387904 6236276364886386410 +170282 4611833225706935900 9223371583739401906 8076893424988479310 394417911933408911581006 141657635880324.8 1613795518.1065989 57017 9223372036854775807 4611686018427387904 4755775861151848768 +11482817 4611708000353743073 9223337838355779113 14841435427430843458 361995300393829962204226 283531099960470.8 9938452.835998287 52345 9223372036854775807 4611686018427387904 5371586112642152558 +63469 4611695097019173921 9223353530156141191 6296784708578574520 360843057610541117735096 120762239817777.88 579655378.4603049 52142 9223372036854775807 4611686018427387904 4150567963952988110 +29103473 4611744585914335132 9223333530281362537 5908285283932344933 330534668598011678200421 123712996438970.34 867841.595541967 47758 9223372036854775807 4611686018427387904 3238284030821087319 Simple functions if combinator -1704509 4611700827100483880 9223310246721229500 16398241567152875142 62618822667209.71 2224726.7626273884 261874 -732797 4611721382223060002 9223355550934604746 16281585268876620522 68472164943295.68 5898616.931652982 237784 -598875 4611701407242345792 9223362250391155632 3577699408183553052 21300140553347.42 53771550.26565126 167966 -792887 4611699550286611812 9223164887726235740 7088177025760385824 56461952267903.89 92835869.96920013 125539 -3807842 4611710821592843606 9223283397553859544 5756765290752687660 58835559208469.4 39794091.419183925 97845 -25703952 4611784761593342388 9223241341744449690 4782279928971192568 65182094768443.91 9276773.708181158 73368 -716829 4611852156092872082 9223361623076951140 8613712481895484190 191445613359755.62 291083243.75407773 44993 -59183 4611730685242027332 9223354909338698162 18369075291092794110 429013599530392 5925109959.715378 42817 -33010362 4611704682869732882 9223092117352620518 9991152681891671022 257099731913529.5 12412830.045471078 38861 -800784 4611752907938305166 9223309994342931384 5251877538869750510 135472890315726.03 53535427.52018088 38767 -20810645 4611712185532639162 9223218900001937412 11803718472901310700 323593455407553 10496765.20741332 36477 -25843850 4611744529689964352 9223346023778617822 127137885677350808 3700925266420.715 18966925.191309396 34353 -23447120 4611796031755620254 9223329309291309758 1841522159325376278 54534534450526.42 6271211.193812284 33768 -14739804 4611762063154116632 9223007205463222212 16302703534054321116 506987919332451.8 6885575.861759452 32156 -32077710 4612033458080771112 9223352444952988904 421072759851674408 13955745719596.793 12220152.393889504 30172 -22446879 4611846229717089436 9223124373140579096 6577134317587565298 224866980668999.47 2482202.163802278 29249 -170282 4611833225706935900 9223371583739401906 15764226366913732386 551447384017691 2515144222.953728 28587 -11482817 4611990575414646848 9223302669582414438 9828522700609834800 378121905921203.2 34845264.2080656 25993 -63469 4612175339998036670 9222961628400798084 17239621485933250238 663164390134376.5 7825349797.6059 25996 -29103473 4611744585914335132 9223035551850347954 12590190375872647672 525927999326314.7 26049107.15514301 23939 +1704509 4611700827100483880 9223310246721229500 16398241567152875142 62618822667209.71 2224726.7626273884 261874 9223372036854775806 4611686018427387904 4518874482384062894 +732797 4611721382223060002 9223355550934604746 16281585268876620522 68472164943295.68 5898616.931652982 237784 9223372036854775806 4611686018427387904 3641900047478154650 +598875 4611701407242345792 9223362250391155632 3577699408183553052 21300140553347.42 53771550.26565126 167966 9223372036854775806 4611686018427387904 1688477495230210408 +792887 4611699550286611812 9223164887726235740 7088177025760385824 56461952267903.89 92835869.96920013 125539 9223372036854775806 4611686018427387904 4850868151095058072 +3807842 4611710821592843606 9223283397553859544 5756765290752687660 58835559208469.4 39794091.419183925 97845 9223372036854775806 4611686018427387904 6845214684357194564 +25703952 4611784761593342388 9223241341744449690 4782279928971192568 65182094768443.91 9276773.708181158 73368 9223372036854775806 4611686018427387904 1384302533387727316 +716829 4611852156092872082 9223361623076951140 8613712481895484190 191445613359755.62 291083243.75407773 44993 9223372036854775806 4611686018427387904 6344483471397203854 +59183 4611730685242027332 9223354909338698162 18369075291092794110 429013599530392 5925109959.715378 42817 9223372036854775806 4611686018427387904 5909305558020042898 +33010362 4611704682869732882 9223092117352620518 9991152681891671022 257099731913529.5 12412830.045471078 38861 9223372036854775806 4611686018427387904 4672855013852508626 +800784 4611752907938305166 9223309994342931384 5251877538869750510 135472890315726.03 53535427.52018088 38767 9223372036854775806 4611686018427387904 7801864489649220514 +20810645 4611712185532639162 9223218900001937412 11803718472901310700 323593455407553 10496765.20741332 36477 9223372036854775806 4611686018427387904 5941995311893397960 +25843850 4611744529689964352 9223346023778617822 127137885677350808 3700925266420.715 18966925.191309396 34353 9223372036854775806 4611686018427387904 6700111718676827412 +23447120 4611796031755620254 9223329309291309758 1841522159325376278 54534534450526.42 6271211.193812284 33768 9223372036854775806 4611686018427387904 2325654077031843898 +14739804 4611762063154116632 9223007205463222212 16302703534054321116 506987919332451.8 6885575.861759452 32156 9223372036854775806 4611686018427387904 2114922310535979832 +32077710 4612033458080771112 9223352444952988904 421072759851674408 13955745719596.793 12220152.393889504 30172 9223372036854775806 4611686018427387904 4399934528735249092 +22446879 4611846229717089436 9223124373140579096 6577134317587565298 224866980668999.47 2482202.163802278 29249 9223372036854775806 4611686018427387904 8763910740678180498 +170282 4611833225706935900 9223371583739401906 15764226366913732386 551447384017691 2515144222.953728 28587 9223372036854775806 4611686018427387904 8217388408377809010 +11482817 4611990575414646848 9223302669582414438 9828522700609834800 378121905921203.2 34845264.2080656 25993 9223372036854775806 4611686018427387904 4689180182672571856 +63469 4612175339998036670 9222961628400798084 17239621485933250238 663164390134376.5 7825349797.6059 25996 9223372036854775806 4611686018427387904 2067736879306995526 +29103473 4611744585914335132 9223035551850347954 12590190375872647672 525927999326314.7 26049107.15514301 23939 9223372036854775806 4611686018427387904 8318055464870862444 Aggregation without JIT compilation Simple functions -1704509 4611700827100483880 9223360787015464643 10441337359398154812 19954243669348.844 9648741.579254271 523264 -732797 4611701940806302259 9223355550934604746 977192643464016658 2054229034942.3723 51998323.94457991 475698 -598875 4611701407242345792 9223362250391155632 9312163881623734456 27615161624211.875 12261797.824844675 337212 -792887 4611699550286611812 9223290551912005343 6930300520201292824 27479710385933.586 53095331.60360441 252197 -3807842 4611710821592843606 9223326163906184987 16710274896338005145 85240848090850.69 22373416.533275086 196036 -25703952 4611709443519524003 9223353913449113943 9946868158853570839 67568783303242.086 3154349.826950714 147211 -716829 4611852156092872082 9223361623076951140 15381015774917924786 170693446547158.72 201431892.4773785 90109 -59183 4611730685242027332 9223354909338698162 8078812522502896568 94622946187035.42 1425270865.0901496 85379 -33010362 4611704682869732882 9223268545373999677 2064452191838585926 26532987929602.555 3695122.4062526934 77807 -800784 4611752907938305166 9223340418389788041 18082918611792817587 233352070043266.62 36535786.81446395 77492 -20810645 4611712185532639162 9223218900001937412 4996531385439292694 68246505203164.63 6316535.831023813 73213 -25843850 4611690025407720929 9223346023778617822 12755881190906812868 185015319325648.16 9962165.34831339 68945 -23447120 4611796031755620254 9223329309291309758 17231649548755339966 255019232629204.38 7937191.271698021 67570 -14739804 4611692230555590277 9223313509005166531 2458378896777063244 38308020331864.36 14590240.469105456 64174 -32077710 4611884228437061959 9223352444952988904 12965822147651192908 214467085941034.7 7257521.096258734 60456 -22446879 4611846229717089436 9223124373140579096 13530160492087688838 231724477077663.4 4737362.521046629 58389 -170282 4611833225706935900 9223371583739401906 8076893424988479310 141657635880324.8 1613795518.1065989 57017 -11482817 4611708000353743073 9223337838355779113 14841435427430843458 283531099960470.8 9938452.835998287 52345 -63469 4611695097019173921 9223353530156141191 6296784708578574520 120762239817777.88 579655378.4603049 52142 -29103473 4611744585914335132 9223333530281362537 5908285283932344933 123712996438970.34 867841.595541967 47758 +1704509 4611700827100483880 9223360787015464643 10441337359398154812 19954243669348.844 9648741.579254271 523264 9223372036854775807 4611686018427387904 4544239379628300646 +732797 4611701940806302259 9223355550934604746 977192643464016658 2054229034942.3723 51998323.94457991 475698 9223372036854775807 4611686018427387904 4091184823334377716 +598875 4611701407242345792 9223362250391155632 9312163881623734456 27615161624211.875 12261797.824844675 337212 9223372036854775807 4611686018427387904 3725992504798702670 +792887 4611699550286611812 9223290551912005343 6930300520201292824 27479710385933.586 53095331.60360441 252197 9223372036854775807 4611686018427387904 6536441508464694614 +3807842 4611710821592843606 9223326163906184987 16710274896338005145 85240848090850.69 22373416.533275086 196036 9223372036854775807 4611686018427387904 1797862753609257231 +25703952 4611709443519524003 9223353913449113943 9946868158853570839 67568783303242.086 3154349.826950714 147211 9223372036854775807 4611686018427387904 8737124378202300429 +716829 4611852156092872082 9223361623076951140 15381015774917924786 170693446547158.72 201431892.4773785 90109 9223372036854775807 4611686018427387904 8209915323001116338 +59183 4611730685242027332 9223354909338698162 8078812522502896568 94622946187035.42 1425270865.0901496 85379 9223372036854775807 4611686018427387904 8909082036598843562 +33010362 4611704682869732882 9223268545373999677 2064452191838585926 26532987929602.555 3695122.4062526934 77807 9223372036854775807 4611686018427387904 5411365383789552292 +800784 4611752907938305166 9223340418389788041 18082918611792817587 233352070043266.62 36535786.81446395 77492 9223372036854775807 4611686018427387904 2059255810151375435 +20810645 4611712185532639162 9223218900001937412 4996531385439292694 68246505203164.63 6316535.831023813 73213 9223372036854775807 4611686018427387904 8852740550386113674 +25843850 4611690025407720929 9223346023778617822 12755881190906812868 185015319325648.16 9962165.34831339 68945 9223372036854775807 4611686018427387904 7849665866595760148 +23447120 4611796031755620254 9223329309291309758 17231649548755339966 255019232629204.38 7937191.271698021 67570 9223372036854775807 4611686018427387904 3435410911925610424 +14739804 4611692230555590277 9223313509005166531 2458378896777063244 38308020331864.36 14590240.469105456 64174 9223372036854775807 4611686018427387904 511910855240035342 +32077710 4611884228437061959 9223352444952988904 12965822147651192908 214467085941034.7 7257521.096258734 60456 9223372036854775807 4611686018427387904 2256071920672551964 +22446879 4611846229717089436 9223124373140579096 13530160492087688838 231724477077663.4 4737362.521046629 58389 9223372036854775807 4611686018427387904 6236276364886386410 +170282 4611833225706935900 9223371583739401906 8076893424988479310 141657635880324.8 1613795518.1065989 57017 9223372036854775807 4611686018427387904 4755775861151848768 +11482817 4611708000353743073 9223337838355779113 14841435427430843458 283531099960470.8 9938452.835998287 52345 9223372036854775807 4611686018427387904 5371586112642152558 +63469 4611695097019173921 9223353530156141191 6296784708578574520 120762239817777.88 579655378.4603049 52142 9223372036854775807 4611686018427387904 4150567963952988110 +29103473 4611744585914335132 9223333530281362537 5908285283932344933 123712996438970.34 867841.595541967 47758 9223372036854775807 4611686018427387904 3238284030821087319 Simple functions with non compilable function -1704509 4611700827100483880 9223360787015464643 10441337359398154812 4611686018427387904 19954243669348.844 9648741.579254271 523264 -732797 4611701940806302259 9223355550934604746 977192643464016658 4611686018427387904 2054229034942.3723 51998323.94457991 475698 -598875 4611701407242345792 9223362250391155632 9312163881623734456 4611686018427387904 27615161624211.875 12261797.824844675 337212 -792887 4611699550286611812 9223290551912005343 6930300520201292824 4611686018427387904 27479710385933.586 53095331.60360441 252197 -3807842 4611710821592843606 9223326163906184987 16710274896338005145 4611686018427387904 85240848090850.69 22373416.533275086 196036 -25703952 4611709443519524003 9223353913449113943 9946868158853570839 4611686018427387904 67568783303242.086 3154349.826950714 147211 -716829 4611852156092872082 9223361623076951140 15381015774917924786 4611686018427387904 170693446547158.72 201431892.4773785 90109 -59183 4611730685242027332 9223354909338698162 8078812522502896568 4611686018427387904 94622946187035.42 1425270865.0901496 85379 -33010362 4611704682869732882 9223268545373999677 2064452191838585926 4611686018427387904 26532987929602.555 3695122.4062526934 77807 -800784 4611752907938305166 9223340418389788041 18082918611792817587 4611686018427387904 233352070043266.62 36535786.81446395 77492 -20810645 4611712185532639162 9223218900001937412 4996531385439292694 4611686018427387904 68246505203164.63 6316535.831023813 73213 -25843850 4611690025407720929 9223346023778617822 12755881190906812868 4611686018427387904 185015319325648.16 9962165.34831339 68945 -23447120 4611796031755620254 9223329309291309758 17231649548755339966 4611686018427387904 255019232629204.38 7937191.271698021 67570 -14739804 4611692230555590277 9223313509005166531 2458378896777063244 4611686018427387904 38308020331864.36 14590240.469105456 64174 -32077710 4611884228437061959 9223352444952988904 12965822147651192908 4611686018427387904 214467085941034.7 7257521.096258734 60456 -22446879 4611846229717089436 9223124373140579096 13530160492087688838 4611686018427387904 231724477077663.4 4737362.521046629 58389 -170282 4611833225706935900 9223371583739401906 8076893424988479310 4611686018427387904 141657635880324.8 1613795518.1065989 57017 -11482817 4611708000353743073 9223337838355779113 14841435427430843458 4611686018427387904 283531099960470.8 9938452.835998287 52345 -63469 4611695097019173921 9223353530156141191 6296784708578574520 4611686018427387904 120762239817777.88 579655378.4603049 52142 -29103473 4611744585914335132 9223333530281362537 5908285283932344933 4611686018427387904 123712996438970.34 867841.595541967 47758 +1704509 4611700827100483880 9223360787015464643 10441337359398154812 3620921835565807284859452 19954243669348.844 9648741.579254271 523264 9223372036854775807 4611686018427387904 4544239379628300646 +732797 4611701940806302259 9223355550934604746 977192643464016658 3289442827160604417733394 2054229034942.3723 51998323.94457991 475698 9223372036854775807 4611686018427387904 4091184823334377716 +598875 4611701407242345792 9223362250391155632 9312163881623734456 2330921446573746856380600 27615161624211.875 12261797.824844675 337212 9223372036854775807 4611686018427387904 3725992504798702670 +792887 4611699550286611812 9223290551912005343 6930300520201292824 1745179600137886041476120 27479710385933.586 53095331.60360441 252197 9223372036854775807 4611686018427387904 6536441508464694614 +3807842 4611710821592843606 9223326163906184987 16710274896338005145 1356295121550317411019929 85240848090850.69 22373416.533275086 196036 9223372036854775807 4611686018427387904 1797862753609257231 +25703952 4611709443519524003 9223353913449113943 9946868158853570839 1018731388338768841564439 67568783303242.086 3154349.826950714 147211 9223372036854775807 4611686018427387904 8737124378202300429 +716829 4611852156092872082 9223361623076951140 15381015774917924786 623810478612337115371442 170693446547158.72 201431892.4773785 90109 9223372036854775807 4611686018427387904 8209915323001116338 +59183 4611730685242027332 9223354909338698162 8078812522502896568 589916507545680254024632 94622946187035.42 1425270865.0901496 85379 9223372036854775807 4611686018427387904 8909082036598843562 +33010362 4611704682869732882 9223268545373999677 2064452191838585926 538517864195994778911814 26532987929602.555 3695122.4062526934 77807 9223372036854775807 4611686018427387904 5411365383789552292 +800784 4611752907938305166 9223340418389788041 18082918611792817587 535545510122473785781683 233352070043266.62 36535786.81446395 77492 9223372036854775807 4611686018427387904 2059255810151375435 +20810645 4611712185532639162 9223218900001937412 4996531385439292694 506405014842860050255126 68246505203164.63 6316535.831023813 73213 9223372036854775807 4611686018427387904 8852740550386113674 +25843850 4611690025407720929 9223346023778617822 12755881190906812868 476547495537329753708996 185015319325648.16 9962165.34831339 68945 9223372036854775807 4611686018427387904 7849665866595760148 +23447120 4611796031755620254 9223329309291309758 17231649548755339966 467236365548464278670014 255019232629204.38 7937191.271698021 67570 9223372036854775807 4611686018427387904 3435410911925610424 +14739804 4611692230555590277 9223313509005166531 2458378896777063244 444126268697527941770060 38308020331864.36 14590240.469105456 64174 9223372036854775807 4611686018427387904 511910855240035342 +32077710 4611884228437061959 9223352444952988904 12965822147651192908 417407443977973675608140 214467085941034.7 7257521.096258734 60456 9223372036854775807 4611686018427387904 2256071920672551964 +22446879 4611846229717089436 9223124373140579096 13530160492087688838 403462269796593691082374 231724477077663.4 4737362.521046629 58389 9223372036854775807 4611686018427387904 6236276364886386410 +170282 4611833225706935900 9223371583739401906 8076893424988479310 394417911933408911581006 141657635880324.8 1613795518.1065989 57017 9223372036854775807 4611686018427387904 4755775861151848768 +11482817 4611708000353743073 9223337838355779113 14841435427430843458 361995300393829962204226 283531099960470.8 9938452.835998287 52345 9223372036854775807 4611686018427387904 5371586112642152558 +63469 4611695097019173921 9223353530156141191 6296784708578574520 360843057610541117735096 120762239817777.88 579655378.4603049 52142 9223372036854775807 4611686018427387904 4150567963952988110 +29103473 4611744585914335132 9223333530281362537 5908285283932344933 330534668598011678200421 123712996438970.34 867841.595541967 47758 9223372036854775807 4611686018427387904 3238284030821087319 Simple functions if combinator -1704509 4611700827100483880 9223310246721229500 16398241567152875142 2224726.7626273884 261874 -732797 4611721382223060002 9223355550934604746 16281585268876620522 5898616.931652982 237784 -598875 4611701407242345792 9223362250391155632 3577699408183553052 53771550.26565126 167966 -792887 4611699550286611812 9223164887726235740 7088177025760385824 92835869.96920013 125539 -3807842 4611710821592843606 9223283397553859544 5756765290752687660 39794091.419183925 97845 -25703952 4611784761593342388 9223241341744449690 4782279928971192568 9276773.708181158 73368 -716829 4611852156092872082 9223361623076951140 8613712481895484190 291083243.75407773 44993 -59183 4611730685242027332 9223354909338698162 18369075291092794110 5925109959.715378 42817 -33010362 4611704682869732882 9223092117352620518 9991152681891671022 12412830.045471078 38861 -800784 4611752907938305166 9223309994342931384 5251877538869750510 53535427.52018088 38767 -20810645 4611712185532639162 9223218900001937412 11803718472901310700 10496765.20741332 36477 -25843850 4611744529689964352 9223346023778617822 127137885677350808 18966925.191309396 34353 -23447120 4611796031755620254 9223329309291309758 1841522159325376278 6271211.193812284 33768 -14739804 4611762063154116632 9223007205463222212 16302703534054321116 6885575.861759452 32156 -32077710 4612033458080771112 9223352444952988904 421072759851674408 12220152.393889504 30172 -22446879 4611846229717089436 9223124373140579096 6577134317587565298 2482202.163802278 29249 -170282 4611833225706935900 9223371583739401906 15764226366913732386 2515144222.953728 28587 -11482817 4611990575414646848 9223302669582414438 9828522700609834800 34845264.2080656 25993 -63469 4612175339998036670 9222961628400798084 17239621485933250238 7825349797.6059 25996 -29103473 4611744585914335132 9223035551850347954 12590190375872647672 26049107.15514301 23939 +1704509 4611700827100483880 9223310246721229500 16398241567152875142 62618822667209.71 2224726.7626273884 261874 9223372036854775806 4611686018427387904 4518874482384062894 +732797 4611721382223060002 9223355550934604746 16281585268876620522 68472164943295.68 5898616.931652982 237784 9223372036854775806 4611686018427387904 3641900047478154650 +598875 4611701407242345792 9223362250391155632 3577699408183553052 21300140553347.42 53771550.26565126 167966 9223372036854775806 4611686018427387904 1688477495230210408 +792887 4611699550286611812 9223164887726235740 7088177025760385824 56461952267903.89 92835869.96920013 125539 9223372036854775806 4611686018427387904 4850868151095058072 +3807842 4611710821592843606 9223283397553859544 5756765290752687660 58835559208469.4 39794091.419183925 97845 9223372036854775806 4611686018427387904 6845214684357194564 +25703952 4611784761593342388 9223241341744449690 4782279928971192568 65182094768443.91 9276773.708181158 73368 9223372036854775806 4611686018427387904 1384302533387727316 +716829 4611852156092872082 9223361623076951140 8613712481895484190 191445613359755.62 291083243.75407773 44993 9223372036854775806 4611686018427387904 6344483471397203854 +59183 4611730685242027332 9223354909338698162 18369075291092794110 429013599530392 5925109959.715378 42817 9223372036854775806 4611686018427387904 5909305558020042898 +33010362 4611704682869732882 9223092117352620518 9991152681891671022 257099731913529.5 12412830.045471078 38861 9223372036854775806 4611686018427387904 4672855013852508626 +800784 4611752907938305166 9223309994342931384 5251877538869750510 135472890315726.03 53535427.52018088 38767 9223372036854775806 4611686018427387904 7801864489649220514 +20810645 4611712185532639162 9223218900001937412 11803718472901310700 323593455407553 10496765.20741332 36477 9223372036854775806 4611686018427387904 5941995311893397960 +25843850 4611744529689964352 9223346023778617822 127137885677350808 3700925266420.715 18966925.191309396 34353 9223372036854775806 4611686018427387904 6700111718676827412 +23447120 4611796031755620254 9223329309291309758 1841522159325376278 54534534450526.42 6271211.193812284 33768 9223372036854775806 4611686018427387904 2325654077031843898 +14739804 4611762063154116632 9223007205463222212 16302703534054321116 506987919332451.8 6885575.861759452 32156 9223372036854775806 4611686018427387904 2114922310535979832 +32077710 4612033458080771112 9223352444952988904 421072759851674408 13955745719596.793 12220152.393889504 30172 9223372036854775806 4611686018427387904 4399934528735249092 +22446879 4611846229717089436 9223124373140579096 6577134317587565298 224866980668999.47 2482202.163802278 29249 9223372036854775806 4611686018427387904 8763910740678180498 +170282 4611833225706935900 9223371583739401906 15764226366913732386 551447384017691 2515144222.953728 28587 9223372036854775806 4611686018427387904 8217388408377809010 +11482817 4611990575414646848 9223302669582414438 9828522700609834800 378121905921203.2 34845264.2080656 25993 9223372036854775806 4611686018427387904 4689180182672571856 +63469 4612175339998036670 9222961628400798084 17239621485933250238 663164390134376.5 7825349797.6059 25996 9223372036854775806 4611686018427387904 2067736879306995526 +29103473 4611744585914335132 9223035551850347954 12590190375872647672 525927999326314.7 26049107.15514301 23939 9223372036854775806 4611686018427387904 8318055464870862444 diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql index 90917209d1b..6c13c6e4d42 100644 --- a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql @@ -5,18 +5,52 @@ SELECT 'Aggregation using JIT compilation'; SELECT 'Simple functions'; -SELECT CounterID, min(WatchID), max(WatchID), sum(WatchID), avg(WatchID), avgWeighted(WatchID, CounterID), count(WatchID) FROM test.hits +SELECT + CounterID, + min(WatchID), + max(WatchID), + sum(WatchID), + avg(WatchID), + avgWeighted(WatchID, CounterID), + count(WatchID), + groupBitOr(WatchID), + groupBitAnd(WatchID), + groupBitXor(WatchID) +FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20; SELECT 'Simple functions with non compilable function'; -SELECT CounterID, min(WatchID), max(WatchID), sum(WatchID), groupBitAnd(WatchID), avg(WatchID), avgWeighted(WatchID, CounterID), count(WatchID) FROM test.hits +SELECT + CounterID, + min(WatchID), + max(WatchID), + sum(WatchID), + sum(toUInt128(WatchID)), + avg(WatchID), + avgWeighted(WatchID, CounterID), + count(WatchID), + groupBitOr(WatchID), + groupBitAnd(WatchID), + groupBitXor(WatchID) +FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20; SELECT 'Simple functions if combinator'; WITH (WatchID % 2 == 0) AS predicate -SELECT CounterID, minIf(WatchID,predicate), maxIf(WatchID, predicate), sumIf(WatchID, predicate), avgIf(WatchID, predicate), avgWeightedIf(WatchID, CounterID, predicate), countIf(WatchID, predicate) FROM test.hits +SELECT + CounterID, + minIf(WatchID,predicate), + maxIf(WatchID, predicate), + sumIf(WatchID, predicate), + avgIf(WatchID, predicate), + avgWeightedIf(WatchID, CounterID, predicate), + countIf(WatchID, predicate), + groupBitOrIf(WatchID, predicate), + groupBitAndIf(WatchID, predicate), + groupBitXorIf(WatchID, predicate) +FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20; SET compile_aggregate_expressions = 0; @@ -25,15 +59,49 @@ SELECT 'Aggregation without JIT compilation'; SELECT 'Simple functions'; -SELECT CounterID, min(WatchID), max(WatchID), sum(WatchID), avg(WatchID), avgWeighted(WatchID, CounterID), count(WatchID) FROM test.hits +SELECT + CounterID, + min(WatchID), + max(WatchID), + sum(WatchID), + avg(WatchID), + avgWeighted(WatchID, CounterID), + count(WatchID), + groupBitOr(WatchID), + groupBitAnd(WatchID), + groupBitXor(WatchID) +FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20; SELECT 'Simple functions with non compilable function'; -SELECT CounterID, min(WatchID), max(WatchID), sum(WatchID), groupBitAnd(WatchID), avg(WatchID), avgWeighted(WatchID, CounterID), count(WatchID) FROM test.hits +SELECT + CounterID, + min(WatchID), + max(WatchID), + sum(WatchID), + sum(toUInt128(WatchID)), + avg(WatchID), + avgWeighted(WatchID, CounterID), + count(WatchID), + groupBitOr(WatchID), + groupBitAnd(WatchID), + groupBitXor(WatchID) +FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20; SELECT 'Simple functions if combinator'; WITH (WatchID % 2 == 0) AS predicate -SELECT CounterID, minIf(WatchID,predicate), maxIf(WatchID, predicate), sumIf(WatchID, predicate), avgWeightedIf(WatchID, CounterID, predicate), countIf(WatchID, predicate) FROM test.hits +SELECT + CounterID, + minIf(WatchID,predicate), + maxIf(WatchID, predicate), + sumIf(WatchID, predicate), + avgIf(WatchID, predicate), + avgWeightedIf(WatchID, CounterID, predicate), + countIf(WatchID, predicate), + groupBitOrIf(WatchID, predicate), + groupBitAndIf(WatchID, predicate), + groupBitXorIf(WatchID, predicate) +FROM test.hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20; diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 541dea23698..afa6b9c8a25 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,7 +1,10 @@ +v21.7.2.7-stable 2021-07-09 +v21.6.7.57-stable 2021-07-09 v21.6.6.51-stable 2021-07-02 v21.6.5.37-stable 2021-06-19 v21.6.4.26-stable 2021-06-11 v21.6.3.14-stable 2021-06-04 +v21.5.9.4-stable 2021-07-10 v21.5.8.21-stable 2021-07-02 v21.5.7.9-stable 2021-06-22 v21.5.6.6-stable 2021-05-29 @@ -11,6 +14,7 @@ v21.4.6.55-stable 2021-04-30 v21.4.5.46-stable 2021-04-24 v21.4.4.30-stable 2021-04-16 v21.4.3.21-stable 2021-04-12 +v21.3.15.4-stable 2021-07-10 v21.3.14.1-lts 2021-07-01 v21.3.13.9-lts 2021-06-22 v21.3.12.2-lts 2021-05-25 @@ -67,6 +71,7 @@ v20.9.5.5-stable 2020-11-13 v20.9.4.76-stable 2020-10-29 v20.9.3.45-stable 2020-10-09 v20.9.2.20-stable 2020-09-22 +v20.8.19.4-stable 2021-07-10 v20.8.18.32-lts 2021-04-16 v20.8.17.25-lts 2021-04-08 v20.8.16.20-lts 2021-04-06