Merge remote-tracking branch 'origin' into integration-2

This commit is contained in:
Yatsishin Ilya 2021-03-26 13:08:39 +03:00
commit fb3078a863
166 changed files with 1982 additions and 792 deletions

2
.gitmodules vendored
View File

@ -93,7 +93,7 @@
url = https://github.com/ClickHouse-Extras/libunwind.git
[submodule "contrib/simdjson"]
path = contrib/simdjson
url = https://github.com/ClickHouse-Extras/simdjson.git
url = https://github.com/simdjson/simdjson.git
[submodule "contrib/rapidjson"]
path = contrib/rapidjson
url = https://github.com/ClickHouse-Extras/rapidjson

View File

@ -1069,11 +1069,11 @@ public:
}
template <typename DateOrTime>
inline LUTIndex addMonthsIndex(DateOrTime v, Int64 delta) const
inline LUTIndex NO_SANITIZE_UNDEFINED addMonthsIndex(DateOrTime v, Int64 delta) const
{
const Values & values = lut[toLUTIndex(v)];
Int64 month = static_cast<Int64>(values.month) + delta;
Int64 month = values.month + delta;
if (month > 0)
{

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit cdb6e3f2ce4464225daf9c8beeae7db98d590bdc
Subproject commit 2b24f14594d7606792b92544bb112a6322ba34d7

2
contrib/simdjson vendored

@ -1 +1 @@
Subproject commit 3190d66a49059092a1753dc35595923debfc1698
Subproject commit 95b4870e20be5f97d9dcf63b23b1c6f520c366c1

View File

@ -18,6 +18,7 @@ RUN apt-get update \
clickhouse-client=$version \
clickhouse-common-static=$version \
locales \
tzdata \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf \
&& apt-get clean

View File

@ -32,6 +32,7 @@ RUN groupadd -r clickhouse --gid=101 \
clickhouse-server=$version \
locales \
wget \
tzdata \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -21,7 +21,9 @@ RUN addgroup -S -g 101 clickhouse \
&& chown clickhouse:clickhouse /var/lib/clickhouse \
&& chown root:clickhouse /var/log/clickhouse-server \
&& chmod +x /entrypoint.sh \
&& apk add --no-cache su-exec bash \
&& apk add --no-cache su-exec bash tzdata \
&& cp /usr/share/zoneinfo/UTC /etc/localtime \
&& echo "UTC" > /etc/timezone \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# we need to allow "others" access to clickhouse folder, because docker container

View File

@ -292,6 +292,7 @@ function run_tests
01318_decrypt # Depends on OpenSSL
01663_aes_msan # Depends on OpenSSL
01667_aes_args_check # Depends on OpenSSL
01776_decrypt_aead_size_check # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent

View File

@ -266,14 +266,13 @@ for query_index in queries_to_run:
try:
# Will also detect too long queries during warmup stage
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': 10})
res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': args.max_query_seconds})
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (prewarm_id, *e.args)
e.message = prewarm_id + ': ' + e.message
raise
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
except KeyboardInterrupt:
raise
@ -320,7 +319,7 @@ for query_index in queries_to_run:
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id)
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)

View File

@ -26,6 +26,7 @@ def process_result(result_folder):
with open(err_path, 'r') as f:
if 'AssertionError' in f.read():
summary.append((test, "FAIL"))
status = 'failure'
else:
summary.append((test, "OK"))

View File

@ -11,7 +11,7 @@ service clickhouse-server start && sleep 5
cd /sqlancer/sqlancer-master
export TIMEOUT=60
export TIMEOUT=300
export NUM_QUERIES=1000
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err

View File

@ -19,7 +19,7 @@ $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test
Using [CLI interface](../../interfaces/cli.md):
``` bash
$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JSONEachRow"
$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow"
```
Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead.

View File

@ -50,7 +50,7 @@ The supported formats are:
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | |
| [ORC](#data-format-orc) | ✔ | |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
@ -1284,32 +1284,33 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
## ORC {#data-format-orc}
[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse.
[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the [Hadoop](https://hadoop.apache.org/) ecosystem.
### Data Types Matching {#data_types-matching-3}
The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` queries.
The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
| ORC data type (`INSERT`) | ClickHouse data type |
|--------------------------|-----------------------------------------------------|
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) |
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) |
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) |
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) |
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) |
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) |
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) |
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) |
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) |
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) |
| `DATE32` | [Date](../sql-reference/data-types/date.md) |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) |
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|--------------------------|-----------------------------------------------------|--------------------------|
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` |
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` |
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` |
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.
Unsupported ORC data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
Unsupported ORC data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
The data types of ClickHouse table columns dont have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column.
@ -1321,6 +1322,14 @@ You can insert ORC data from a file into ClickHouse table by the following comma
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
```
### Selecting Data {#selecting-data-2}
You can select data from a ClickHouse table and save them into some file in the ORC format by the following command:
``` bash
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
```
To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md).
## LineAsString {#lineasstring}

View File

@ -9,7 +9,7 @@ Columns:
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened.
- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened.
- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error.
- `last_error_stacktrace` ([String](../../sql-reference/data-types/string.md)) — stacktrace for the last error.
- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored.
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query).
**Example**
@ -25,3 +25,12 @@ LIMIT 1
│ CANNOT_OPEN_FILE │ 76 │ 1 │
└──────────────────┴──────┴───────┘
```
``` sql
WITH arrayMap(x -> demangle(addressToSymbol(x)), last_error_trace) AS all
SELECT name, arrayStringConcat(all, '\n') AS res
FROM system.errors
LIMIT 1
SETTINGS allow_introspection_functions=1\G
```

View File

@ -320,8 +320,6 @@ Similar to `cache`, but stores data on SSD and index in RAM.
<write_buffer_size>1048576</write_buffer_size>
<!-- Path where cache file will be stored. -->
<path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
<!-- Max number on stored keys in the cache. Rounded up to a power of two. -->
<max_stored_keys>1048576</max_stored_keys>
</ssd_cache>
</layout>
```
@ -329,8 +327,8 @@ Similar to `cache`, but stores data on SSD and index in RAM.
or
``` sql
LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict))
```
### complex_key_ssd_cache {#complex-key-ssd-cache}

View File

@ -23,7 +23,9 @@ ClickHouse supports the standard grammar for defining windows and window functio
| `GROUPS` frame | not supported |
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported |
| `rank()`, `dense_rank()`, `row_number()` | supported |
| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
| `lag/lead(value, offset)` | Not supported. Workarounds: |
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead`|
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
## References

View File

@ -49,7 +49,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | |
| [ORC](#data-format-orc) | ✔ | |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
@ -1203,45 +1203,53 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
## ORC {#data-format-orc}
[Apache ORC](https://orc.apache.org/) - это column-oriented формат данных, распространённый в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse.
[Apache ORC](https://orc.apache.org/) — это столбцовый формат данных, распространенный в экосистеме [Hadoop](https://hadoop.apache.org/).
### Соответствие типов данных {#sootvetstvie-tipov-dannykh-1}
Таблица показывает поддержанные типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT`.
Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT` и `SELECT`.
| Тип данных ORC (`INSERT`) | Тип данных ClickHouse |
|---------------------------|-----------------------------------------------------|
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) |
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) |
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) |
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) |
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) |
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) |
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) |
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) |
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) |
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) |
| `DATE32` | [Date](../sql-reference/data-types/date.md) |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) |
| Тип данных ORC (`INSERT`) | Тип данных ClickHouse | Тип данных ORC (`SELECT`) |
|---------------------------|-----------------------------------------------------|---------------------------|
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` |
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` |
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` |
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT` |
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `DATE32` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.
Неподдержанные типы данных ORC: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
Неподдерживаемые типы данных ORC: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных, ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.
Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.
### Вставка данных {#vstavka-dannykh-1}
Данные ORC можно вставить в таблицу ClickHouse командой:
Чтобы вставить в ClickHouse данные из файла в формате ORC, используйте команду следующего вида:
``` bash
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
```
Для обмена данных с Hadoop можно использовать [движок таблиц HDFS](../engines/table-engines/integrations/hdfs.md).
### Вывод данных {#vyvod-dannykh-1}
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида:
``` bash
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.orc}
```
Для обмена данных с экосистемой Hadoop вы можете использовать [движок таблиц HDFS](../engines/table-engines/integrations/hdfs.md).
## LineAsString {#lineasstring}

View File

@ -318,8 +318,6 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
<write_buffer_size>1048576</write_buffer_size>
<!-- Path where cache file will be stored. -->
<path>/var/lib/clickhouse/clickhouse_dictionaries/test_dict</path>
<!-- Max number on stored keys in the cache. Rounded up to a power of two. -->
<max_stored_keys>1048576</max_stored_keys>
</ssd_cache>
</layout>
```
@ -327,8 +325,8 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
или
``` sql
LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict))
```
### complex_key_ssd_cache {#complex-key-ssd-cache}

View File

@ -26,7 +26,7 @@ numpy==1.19.2
Pygments==2.5.2
pymdown-extensions==8.0
python-slugify==4.0.1
PyYAML==5.3.1
PyYAML==5.4.1
repackage==0.7.3
requests==2.24.0
singledispatch==3.4.0.3

View File

@ -8,10 +8,10 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/copyData.h>
#include <IO/TimeoutSetter.h>
#include <DataStreams/NativeBlockInputStream.h>
#include <DataStreams/NativeBlockOutputStream.h>
#include <Client/Connection.h>
#include <Client/TimeoutSetter.h>
#include <Common/ClickHouseRevision.h>
#include <Common/Exception.h>
#include <Common/NetException.h>

View File

@ -16,7 +16,6 @@ SRCS(
HedgedConnections.cpp
HedgedConnectionsFactory.cpp
MultiplexedConnections.cpp
TimeoutSetter.cpp
)

View File

@ -560,7 +560,7 @@ namespace DB
{
namespace ErrorCodes
{
#define M(VALUE, NAME) extern const Value NAME = VALUE;
#define M(VALUE, NAME) extern const ErrorCode NAME = VALUE;
APPLY_FOR_ERROR_CODES(M)
#undef M
@ -587,7 +587,7 @@ namespace ErrorCodes
ErrorCode end() { return END + 1; }
void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace)
void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace)
{
if (error_code >= end())
{
@ -596,10 +596,10 @@ namespace ErrorCodes
error_code = end() - 1;
}
values[error_code].increment(remote, message, stacktrace);
values[error_code].increment(remote, message, trace);
}
void ErrorPairHolder::increment(bool remote, const std::string & message, const std::string & stacktrace)
void ErrorPairHolder::increment(bool remote, const std::string & message, const FramePointers & trace)
{
const auto now = std::chrono::system_clock::now();
@ -609,7 +609,7 @@ namespace ErrorCodes
++error.count;
error.message = message;
error.stacktrace = stacktrace;
error.trace = trace;
error.error_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
}
ErrorPair ErrorPairHolder::get()

View File

@ -1,11 +1,12 @@
#pragma once
#include <stddef.h>
#include <cstddef>
#include <cstdint>
#include <utility>
#include <mutex>
#include <common/types.h>
#include <string_view>
#include <vector>
#include <common/types.h>
/** Allows to count number of simultaneously happening error codes.
* See also Exception.cpp for incrementing part.
@ -19,6 +20,7 @@ namespace ErrorCodes
/// ErrorCode identifier (index in array).
using ErrorCode = int;
using Value = size_t;
using FramePointers = std::vector<void *>;
/// Get name of error_code by identifier.
/// Returns statically allocated string.
@ -33,7 +35,7 @@ namespace ErrorCodes
/// Message for the last error.
std::string message;
/// Stacktrace for the last error.
std::string stacktrace;
FramePointers trace;
};
struct ErrorPair
{
@ -46,7 +48,7 @@ namespace ErrorCodes
{
public:
ErrorPair get();
void increment(bool remote, const std::string & message, const std::string & stacktrace);
void increment(bool remote, const std::string & message, const FramePointers & trace);
private:
ErrorPair value;
@ -60,7 +62,7 @@ namespace ErrorCodes
ErrorCode end();
/// Add value for specified error_code.
void increment(ErrorCode error_code, bool remote, const std::string & message, const std::string & stacktrace);
void increment(ErrorCode error_code, bool remote, const std::string & message, const FramePointers & trace);
}
}

View File

@ -36,7 +36,7 @@ namespace ErrorCodes
/// - Aborts the process if error code is LOGICAL_ERROR.
/// - Increments error codes statistics.
void handle_error_code([[maybe_unused]] const std::string & msg, const std::string & stacktrace, int code, bool remote)
void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool remote, const Exception::FramePointers & trace)
{
// In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure.
// Log the message before we fail.
@ -47,20 +47,21 @@ void handle_error_code([[maybe_unused]] const std::string & msg, const std::stri
abort();
}
#endif
ErrorCodes::increment(code, remote, msg, stacktrace);
ErrorCodes::increment(code, remote, msg, trace);
}
Exception::Exception(const std::string & msg, int code, bool remote_)
: Poco::Exception(msg, code)
, remote(remote_)
{
handle_error_code(msg, getStackTraceString(), code, remote);
handle_error_code(msg, code, remote, getStackFramePointers());
}
Exception::Exception(const std::string & msg, const Exception & nested, int code)
: Poco::Exception(msg, nested, code)
{
handle_error_code(msg, getStackTraceString(), code, remote);
handle_error_code(msg, code, remote, getStackFramePointers());
}
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
@ -101,6 +102,31 @@ std::string Exception::getStackTraceString() const
#endif
}
Exception::FramePointers Exception::getStackFramePointers() const
{
FramePointers frame_pointers;
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
{
frame_pointers.resize(get_stack_trace_size());
for (size_t i = 0; i < frame_pointers.size(); ++i)
{
frame_pointers[i] = get_stack_trace_frames()[i];
}
}
#else
{
size_t stack_trace_size = trace.getSize();
size_t stack_trace_offset = trace.getOffset();
frame_pointers.reserve(stack_trace_size - stack_trace_offset);
for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
{
frame_pointers.push_back(trace.getFramePointers()[i]);
}
}
#endif
return frame_pointers;
}
void throwFromErrno(const std::string & s, int code, int the_errno)
{

View File

@ -24,6 +24,8 @@ namespace DB
class Exception : public Poco::Exception
{
public:
using FramePointers = std::vector<void *>;
Exception() = default;
Exception(const std::string & msg, int code, bool remote_ = false);
Exception(const std::string & msg, const Exception & nested, int code);
@ -66,6 +68,8 @@ public:
bool isRemoteException() const { return remote; }
std::string getStackTraceString() const;
/// Used for system.errors
FramePointers getStackFramePointers() const;
private:
#ifndef STD_EXCEPTION_HAS_STACK_TRACE

View File

@ -271,13 +271,13 @@ private:
};
template <typename Key, typename Mapped>
struct DefaultCellDisposer
struct DefaultLRUHashMapCellDisposer
{
void operator()(const Key &, const Mapped &) const {}
};
template <typename Key, typename Value, typename Disposer = DefaultCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
template <typename Key, typename Value, typename Disposer = DefaultLRUHashMapCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
using LRUHashMap = LRUHashMapImpl<Key, Value, Disposer, Hash, false>;
template <typename Key, typename Value, typename Disposer = DefaultCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
template <typename Key, typename Value, typename Disposer = DefaultLRUHashMapCellDisposer<Key, Value>, typename Hash = DefaultHash<Key>>
using LRUHashMapWithSavedHash = LRUHashMapImpl<Key, Value, Disposer, Hash, true>;

View File

@ -692,6 +692,30 @@ public:
assign(from.begin(), from.end());
}
void erase(const_iterator first, const_iterator last)
{
iterator first_no_const = const_cast<iterator>(first);
iterator last_no_const = const_cast<iterator>(last);
size_t items_to_move = end() - last;
while (items_to_move != 0)
{
*first_no_const = *last_no_const;
++first_no_const;
++last_no_const;
--items_to_move;
}
this->c_end = reinterpret_cast<char *>(first_no_const);
}
void erase(const_iterator pos)
{
this->erase(pos, pos + 1);
}
bool operator== (const PODArray & rhs) const
{

View File

@ -92,3 +92,57 @@ TEST(Common, PODInsertElementSizeNotMultipleOfLeftPadding)
EXPECT_EQ(arr1_initially_empty.size(), items_to_insert_size);
}
TEST(Common, PODErase)
{
{
PaddedPODArray<UInt64> items {0,1,2,3,4,5,6,7,8,9};
PaddedPODArray<UInt64> expected;
expected = {0,1,2,3,4,5,6,7,8,9};
items.erase(items.begin(), items.begin());
EXPECT_EQ(items, expected);
items.erase(items.end(), items.end());
EXPECT_EQ(items, expected);
}
{
PaddedPODArray<UInt64> actual {0,1,2,3,4,5,6,7,8,9};
PaddedPODArray<UInt64> expected;
expected = {0,1,4,5,6,7,8,9};
actual.erase(actual.begin() + 2, actual.begin() + 4);
EXPECT_EQ(actual, expected);
expected = {0,1,4};
actual.erase(actual.begin() + 3, actual.end());
EXPECT_EQ(actual, expected);
expected = {};
actual.erase(actual.begin(), actual.end());
EXPECT_EQ(actual, expected);
for (size_t i = 0; i < 10; ++i)
actual.emplace_back(static_cast<UInt64>(i));
expected = {0,1,4,5,6,7,8,9};
actual.erase(actual.begin() + 2, actual.begin() + 4);
EXPECT_EQ(actual, expected);
expected = {0,1,4};
actual.erase(actual.begin() + 3, actual.end());
EXPECT_EQ(actual, expected);
expected = {};
actual.erase(actual.begin(), actual.end());
EXPECT_EQ(actual, expected);
}
{
PaddedPODArray<UInt64> actual {0,1,2,3,4,5,6,7,8,9};
PaddedPODArray<UInt64> expected;
expected = {1,2,3,4,5,6,7,8,9};
actual.erase(actual.begin());
EXPECT_EQ(actual, expected);
}
}

View File

@ -14,7 +14,7 @@ PEERDIR(
clickhouse/base/common
clickhouse/base/pcg-random
clickhouse/base/widechar_width
contrib/libs/libcpuid/libcpuid
contrib/libs/libcpuid
contrib/libs/openssl
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/re2

View File

@ -13,7 +13,7 @@ PEERDIR(
clickhouse/base/common
clickhouse/base/pcg-random
clickhouse/base/widechar_width
contrib/libs/libcpuid/libcpuid
contrib/libs/libcpuid
contrib/libs/openssl
contrib/libs/poco/NetSSL_OpenSSL
contrib/libs/re2

View File

@ -953,3 +953,26 @@ void writeFieldText(const Field & x, WriteBuffer & buf);
String toString(const Field & x);
}
template <>
struct fmt::formatter<DB::Field>
{
constexpr auto parse(format_parse_context & ctx)
{
auto it = ctx.begin();
auto end = ctx.end();
/// Only support {}.
if (it != end && *it != '}')
throw format_error("invalid format");
return it;
}
template <typename FormatContext>
auto format(const DB::Field & x, FormatContext & ctx)
{
return format_to(ctx.out(), "{}", toString(x));
}
};

View File

@ -101,7 +101,7 @@ template <DictionaryKeyType dictionary_key_type>
double CacheDictionary<dictionary_key_type>::getLoadFactor() const
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
return static_cast<double>(cache_storage_ptr->getSize()) / cache_storage_ptr->getMaxSize();
return cache_storage_ptr->getLoadFactor();
}
template <DictionaryKeyType dictionary_key_type>
@ -333,9 +333,7 @@ Columns CacheDictionary<dictionary_key_type>::getColumnsImpl(
FetchResult result_of_fetch_from_storage;
{
/// Write lock on storage
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
result_of_fetch_from_storage = cache_storage_ptr->fetchColumnsForKeys(keys, request);
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <chrono>
#include <variant>
#include <pcg_random.hpp>
@ -30,28 +31,31 @@ struct CacheDictionaryStorageConfiguration
const DictionaryLifetime lifetime;
};
/** Keys are stored in LRUCache and column values are serialized into arena.
Cell in LRUCache consists of allocated size and place in arena were columns serialized data is stored.
Columns are serialized by rows.
When cell is removed from LRUCache data associated with it is also removed from arena.
In case of complex key we also store key data in arena and it is removed from arena.
*/
/** ICacheDictionaryStorage implementation that keeps key in hash table with fixed collision length.
* Value in hash table point to index in attributes arrays.
*/
template <DictionaryKeyType dictionary_key_type>
class CacheDictionaryStorage final : public ICacheDictionaryStorage
{
static constexpr size_t max_collision_length = 10;
public:
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by CacheDictionaryStorage");
explicit CacheDictionaryStorage(CacheDictionaryStorageConfiguration & configuration_)
explicit CacheDictionaryStorage(
const DictionaryStructure & dictionary_structure,
CacheDictionaryStorageConfiguration & configuration_)
: configuration(configuration_)
, rnd_engine(randomSeed())
, cache(configuration.max_size_in_cells, false, { arena })
{
size_t cells_size = roundUpToPowerOfTwoOrZero(std::max(configuration.max_size_in_cells, max_collision_length));
cells.resize_fill(cells_size);
size_overlap_mask = cells_size - 1;
setup(dictionary_structure);
}
bool returnsFetchedColumnsInOrderOfRequestedKeys() const override { return true; }
@ -71,9 +75,7 @@ public:
const DictionaryStorageFetchRequest & fetch_request) override
{
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
{
return fetchColumnsForKeysImpl<SimpleKeysStorageFetchResult>(keys, fetch_request);
}
else
throw Exception("Method fetchColumnsForKeys is not supported for complex key storage", ErrorCodes::NOT_IMPLEMENTED);
}
@ -109,9 +111,7 @@ public:
const DictionaryStorageFetchRequest & column_fetch_requests) override
{
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
{
return fetchColumnsForKeysImpl<ComplexKeysStorageFetchResult>(keys, column_fetch_requests);
}
else
throw Exception("Method fetchColumnsForKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
}
@ -140,79 +140,162 @@ public:
throw Exception("Method getCachedComplexKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
}
size_t getSize() const override { return cache.size(); }
size_t getSize() const override { return size; }
size_t getMaxSize() const override { return cache.getMaxSize(); }
double getLoadFactor() const override { return static_cast<double>(size) / configuration.max_size_in_cells; }
size_t getBytesAllocated() const override { return arena.size() + cache.getSizeInBytes(); }
size_t getBytesAllocated() const override
{
size_t attributes_size_in_bytes = 0;
size_t attributes_size = attributes.size();
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
{
getAttributeContainer(attribute_index, [&](const auto & container)
{
attributes_size_in_bytes += container.capacity() * sizeof(container[0]);
});
}
return arena.size() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
}
private:
struct FetchedKey
{
FetchedKey(size_t element_index_, bool is_default_)
: element_index(element_index_)
, is_default(is_default_)
{}
size_t element_index;
bool is_default;
};
template <typename KeysStorageFetchResult>
ALWAYS_INLINE KeysStorageFetchResult fetchColumnsForKeysImpl(
KeysStorageFetchResult fetchColumnsForKeysImpl(
const PaddedPODArray<KeyType> & keys,
const DictionaryStorageFetchRequest & fetch_request)
{
KeysStorageFetchResult result;
result.fetched_columns = fetch_request.makeAttributesResultColumns();
result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
result.key_index_to_state.resize_fill(keys.size());
const auto now = std::chrono::system_clock::now();
const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
size_t fetched_columns_index = 0;
size_t keys_size = keys.size();
std::chrono::seconds max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
size_t keys_size = keys.size();
PaddedPODArray<FetchedKey> fetched_keys;
fetched_keys.resize_fill(keys_size);
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys[key_index];
auto * it = cache.find(key);
auto [key_state, cell_index] = getKeyStateAndCellIndex(key, now);
if (it)
if (unlikely(key_state == KeyState::not_found))
{
/// Columns values for key are serialized in cache now deserialize them
const auto & cell = it->getMapped();
result.key_index_to_state[key_index] = {KeyState::not_found};
++result.not_found_keys_size;
continue;
}
bool has_deadline = cellHasDeadline(cell);
auto & cell = cells[cell_index];
if (has_deadline && now > cell.deadline + max_lifetime_seconds)
{
result.key_index_to_state[key_index] = {KeyState::not_found};
++result.not_found_keys_size;
continue;
}
else if (has_deadline && now > cell.deadline)
{
result.key_index_to_state[key_index] = {KeyState::expired, fetched_columns_index};
++result.expired_keys_size;
}
else
{
result.key_index_to_state[key_index] = {KeyState::found, fetched_columns_index};
++result.found_keys_size;
}
result.expired_keys_size += static_cast<size_t>(key_state == KeyState::expired);
++fetched_columns_index;
result.key_index_to_state[key_index] = {key_state, fetched_columns_index};
fetched_keys[fetched_columns_index] = FetchedKey(cell.element_index, cell.is_default);
if (cell.isDefault())
++fetched_columns_index;
result.key_index_to_state[key_index].setDefaultValue(cell.is_default);
result.default_keys_size += cell.is_default;
}
result.found_keys_size = keys_size - (result.expired_keys_size + result.not_found_keys_size);
for (size_t attribute_index = 0; attribute_index < fetch_request.attributesSize(); ++attribute_index)
{
if (!fetch_request.shouldFillResultColumnWithIndex(attribute_index))
continue;
auto & attribute = attributes[attribute_index];
const auto & default_value_provider = fetch_request.defaultValueProviderAtIndex(attribute_index);
size_t fetched_keys_size = fetched_keys.size();
auto & fetched_column = *result.fetched_columns[attribute_index];
fetched_column.reserve(fetched_keys_size);
if (unlikely(attribute.is_complex_type))
{
auto & container = std::get<std::vector<Field>>(attribute.attribute_container);
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
{
result.key_index_to_state[key_index].setDefault();
++result.default_keys_size;
insertDefaultValuesIntoColumns(result.fetched_columns, fetch_request, key_index);
}
else
{
const char * place_for_serialized_columns = cell.place_for_serialized_columns;
deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, place_for_serialized_columns);
auto fetched_key = fetched_keys[fetched_key_index];
if (unlikely(fetched_key.is_default))
fetched_column.insert(default_value_provider.getDefaultValue(fetched_key_index));
else
fetched_column.insert(container[fetched_key.element_index]);
}
}
else
{
result.key_index_to_state[key_index] = {KeyState::not_found};
++result.not_found_keys_size;
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnType =
std::conditional_t<std::is_same_v<AttributeType, String>, ColumnString,
std::conditional_t<IsDecimalNumber<AttributeType>, ColumnDecimal<ValueType>,
ColumnVector<AttributeType>>>;
auto & container = std::get<PaddedPODArray<ValueType>>(attribute.attribute_container);
ColumnType & column_typed = static_cast<ColumnType &>(fetched_column);
if constexpr (std::is_same_v<ColumnType, ColumnString>)
{
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
{
auto fetched_key = fetched_keys[fetched_key_index];
if (unlikely(fetched_key.is_default))
column_typed.insert(default_value_provider.getDefaultValue(fetched_key_index));
else
{
auto item = container[fetched_key.element_index];
column_typed.insertData(item.data, item.size);
}
}
}
else
{
auto & data = column_typed.getData();
for (size_t fetched_key_index = 0; fetched_key_index < fetched_columns_index; ++fetched_key_index)
{
auto fetched_key = fetched_keys[fetched_key_index];
if (unlikely(fetched_key.is_default))
column_typed.insert(default_value_provider.getDefaultValue(fetched_key_index));
else
{
auto item = container[fetched_key.element_index];
data.push_back(item);
}
}
}
};
callOnDictionaryAttributeType(attribute.type, type_call);
}
}
@ -221,58 +304,108 @@ private:
void insertColumnsForKeysImpl(const PaddedPODArray<KeyType> & keys, Columns columns)
{
Arena temporary_values_pool;
size_t columns_to_serialize_size = columns.size();
PaddedPODArray<StringRef> temporary_column_data(columns_to_serialize_size);
const auto now = std::chrono::system_clock::now();
size_t keys_size = keys.size();
Field column_value;
for (size_t key_index = 0; key_index < keys_size; ++key_index)
for (size_t key_index = 0; key_index < keys.size(); ++key_index)
{
size_t allocated_size_for_columns = 0;
const char * block_start = nullptr;
auto key = keys[key_index];
auto * it = cache.find(key);
for (size_t column_index = 0; column_index < columns_to_serialize_size; ++column_index)
size_t cell_index = getCellIndexForInsert(key);
auto & cell = cells[cell_index];
bool cell_was_default = cell.is_default;
cell.is_default = false;
bool was_inserted = cell.deadline == 0;
if (was_inserted)
{
auto & column = columns[column_index];
temporary_column_data[column_index] = column->serializeValueIntoArena(key_index, temporary_values_pool, block_start);
allocated_size_for_columns += temporary_column_data[column_index].size;
}
if constexpr (std::is_same_v<KeyType, StringRef>)
cell.key = copyStringInArena(key);
else
cell.key = key;
char * place_for_serialized_columns = arena.alloc(allocated_size_for_columns);
memcpy(reinterpret_cast<void*>(place_for_serialized_columns), reinterpret_cast<const void*>(block_start), allocated_size_for_columns);
for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
{
auto & column = columns[attribute_index];
if (it)
{
/// Cell exists need to free previous serialized place and update deadline
auto & cell = it->getMapped();
getAttributeContainer(attribute_index, [&](auto & container)
{
container.emplace_back();
cell.element_index = container.size() - 1;
if (cell.place_for_serialized_columns)
arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
using ElementType = std::decay_t<decltype(container[0])>;
setCellDeadline(cell, now);
cell.allocated_size_for_columns = allocated_size_for_columns;
cell.place_for_serialized_columns = place_for_serialized_columns;
column->get(key_index, column_value);
if constexpr (std::is_same_v<ElementType, Field>)
container.back() = column_value;
else if constexpr (std::is_same_v<ElementType, StringRef>)
{
const String & string_value = column_value.get<String>();
StringRef string_value_ref = StringRef {string_value.data(), string_value.size()};
StringRef inserted_value = copyStringInArena(string_value_ref);
container.back() = inserted_value;
}
else
container.back() = column_value.get<NearestFieldType<ElementType>>();
});
}
++size;
}
else
{
/// No cell exists so create and put in cache
Cell cell;
if (cell.key != key)
{
if constexpr (std::is_same_v<KeyType, StringRef>)
{
char * data = const_cast<char *>(cell.key.data);
arena.free(data, cell.key.size);
cell.key = copyStringInArena(key);
}
else
cell.key = key;
}
setCellDeadline(cell, now);
cell.allocated_size_for_columns = allocated_size_for_columns;
cell.place_for_serialized_columns = place_for_serialized_columns;
/// Put values into existing index
size_t index_to_use = cell.element_index;
insertCellInCache(key, cell);
for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
{
auto & column = columns[attribute_index];
getAttributeContainer(attribute_index, [&](auto & container)
{
using ElementType = std::decay_t<decltype(container[0])>;
column->get(key_index, column_value);
if constexpr (std::is_same_v<ElementType, Field>)
container[index_to_use] = column_value;
else if constexpr (std::is_same_v<ElementType, StringRef>)
{
const String & string_value = column_value.get<String>();
StringRef string_ref_value = StringRef {string_value.data(), string_value.size()};
StringRef inserted_value = copyStringInArena(string_ref_value);
if (!cell_was_default)
{
StringRef previous_value = container[index_to_use];
arena.free(const_cast<char *>(previous_value.data), previous_value.size);
}
container[index_to_use] = inserted_value;
}
else
container[index_to_use] = column_value.get<NearestFieldType<ElementType>>();
});
}
}
temporary_values_pool.rollback(allocated_size_for_columns);
setCellDeadline(cell, now);
}
}
@ -280,94 +413,224 @@ private:
{
const auto now = std::chrono::system_clock::now();
for (auto key : keys)
size_t keys_size = keys.size();
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto * it = cache.find(key);
auto key = keys[key_index];
if (it)
size_t cell_index = getCellIndexForInsert(key);
auto & cell = cells[cell_index];
bool was_inserted = cell.deadline == 0;
bool cell_was_default = cell.is_default;
cell.is_default = true;
if (was_inserted)
{
auto & cell = it->getMapped();
if constexpr (std::is_same_v<KeyType, StringRef>)
cell.key = copyStringInArena(key);
else
cell.key = key;
setCellDeadline(cell, now);
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
{
getAttributeContainer(attribute_index, [&](auto & container)
{
container.emplace_back();
cell.element_index = container.size() - 1;
});
}
if (cell.place_for_serialized_columns)
arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
cell.allocated_size_for_columns = 0;
cell.place_for_serialized_columns = nullptr;
++size;
}
else
{
Cell cell;
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
{
getAttributeContainer(attribute_index, [&](const auto & container)
{
using ElementType = std::decay_t<decltype(container[0])>;
setCellDeadline(cell, now);
cell.allocated_size_for_columns = 0;
cell.place_for_serialized_columns = nullptr;
if constexpr (std::is_same_v<ElementType, StringRef>)
{
if (!cell_was_default)
{
StringRef previous_value = container[cell.element_index];
arena.free(const_cast<char *>(previous_value.data), previous_value.size);
}
}
});
}
insertCellInCache(key, cell);
if (cell.key != key)
{
if constexpr (std::is_same_v<KeyType, StringRef>)
{
char * data = const_cast<char *>(cell.key.data);
arena.free(data, cell.key.size);
cell.key = copyStringInArena(key);
}
else
cell.key = key;
}
}
setCellDeadline(cell, now);
}
}
PaddedPODArray<KeyType> getCachedKeysImpl() const
{
PaddedPODArray<KeyType> result;
result.reserve(cache.size());
result.reserve(size);
for (auto & node : cache)
for (auto & cell : cells)
{
auto & cell = node.getMapped();
if (cell.isDefault())
if (cell.deadline == 0)
continue;
result.emplace_back(node.getKey());
if (cell.is_default)
continue;
result.emplace_back(cell.key);
}
return result;
}
template <typename GetContainerFunc>
void getAttributeContainer(size_t attribute_index, GetContainerFunc && func)
{
auto & attribute = attributes[attribute_index];
auto & attribute_type = attribute.type;
if (unlikely(attribute.is_complex_type))
{
auto & container = std::get<std::vector<Field>>(attribute.attribute_container);
std::forward<GetContainerFunc>(func)(container);
}
else
{
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
auto & container = std::get<PaddedPODArray<ValueType>>(attribute.attribute_container);
std::forward<GetContainerFunc>(func)(container);
};
callOnDictionaryAttributeType(attribute_type, type_call);
}
}
template <typename GetContainerFunc>
void getAttributeContainer(size_t attribute_index, GetContainerFunc && func) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->template getAttributeContainer(attribute_index, std::forward<GetContainerFunc>(func));
}
StringRef copyStringInArena(StringRef value_to_copy)
{
size_t value_to_copy_size = value_to_copy.size;
char * place_for_key = arena.alloc(value_to_copy_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(value_to_copy.data), value_to_copy_size);
StringRef updated_value{place_for_key, value_to_copy_size};
return updated_value;
}
void setup(const DictionaryStructure & dictionary_structure)
{
/// For each dictionary attribute create storage attribute
/// For simple attributes create PODArray, for complex vector of Fields
attributes.reserve(dictionary_structure.attributes.size());
for (const auto & dictionary_attribute : dictionary_structure.attributes)
{
auto attribute_type = dictionary_attribute.underlying_type;
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
attributes.emplace_back();
auto & last_attribute = attributes.back();
last_attribute.type = attribute_type;
last_attribute.is_complex_type = dictionary_attribute.is_nullable || dictionary_attribute.is_array;
if (dictionary_attribute.is_nullable)
last_attribute.attribute_container = std::vector<Field>();
else
last_attribute.attribute_container = PaddedPODArray<ValueType>();
};
callOnDictionaryAttributeType(attribute_type, type_call);
}
}
using TimePoint = std::chrono::system_clock::time_point;
struct Cell
{
TimePoint deadline;
size_t allocated_size_for_columns;
char * place_for_serialized_columns;
inline bool isDefault() const { return place_for_serialized_columns == nullptr; }
inline void setDefault()
{
place_for_serialized_columns = nullptr;
allocated_size_for_columns = 0;
}
KeyType key;
size_t element_index;
bool is_default;
time_t deadline;
};
void insertCellInCache(KeyType & key, const Cell & cell)
struct Attribute
{
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
{
/// Copy complex key into arena and put in cache
size_t key_size = key.size;
char * place_for_key = arena.alloc(key_size);
memcpy(reinterpret_cast<void *>(place_for_key), reinterpret_cast<const void *>(key.data), key_size);
KeyType updated_key{place_for_key, key_size};
key = updated_key;
}
AttributeUnderlyingType type;
bool is_complex_type;
cache.insert(key, cell);
}
std::variant<
PaddedPODArray<UInt8>,
PaddedPODArray<UInt16>,
PaddedPODArray<UInt32>,
PaddedPODArray<UInt64>,
PaddedPODArray<UInt128>,
PaddedPODArray<Int8>,
PaddedPODArray<Int16>,
PaddedPODArray<Int32>,
PaddedPODArray<Int64>,
PaddedPODArray<Decimal32>,
PaddedPODArray<Decimal64>,
PaddedPODArray<Decimal128>,
PaddedPODArray<Float32>,
PaddedPODArray<Float64>,
PaddedPODArray<StringRef>,
std::vector<Field>> attribute_container;
};
inline static bool cellHasDeadline(const Cell & cell)
{
return cell.deadline != std::chrono::system_clock::from_time_t(0);
}
CacheDictionaryStorageConfiguration configuration;
pcg64 rnd_engine;
size_t size_overlap_mask = 0;
size_t size = 0;
PaddedPODArray<Cell> cells;
ArenaWithFreeLists arena;
std::vector<Attribute> attributes;
inline void setCellDeadline(Cell & cell, TimePoint now)
{
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
{
cell.deadline = std::chrono::system_clock::from_time_t(0);
/// This maybe not obvious, but when we define is this cell is expired or expired permanently, we add strict_max_lifetime_seconds
/// to the expiration time. And it overflows pretty well.
auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
return;
}
@ -375,44 +638,75 @@ private:
size_t max_sec_lifetime = configuration.lifetime.max_sec;
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
cell.deadline = now + std::chrono::seconds(distribution(rnd_engine));
auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
}
template <typename>
friend class ArenaCellDisposer;
CacheDictionaryStorageConfiguration configuration;
ArenaWithFreeLists arena;
pcg64 rnd_engine;
class ArenaCellDisposer
inline size_t getCellIndex(const KeyType key) const
{
public:
ArenaWithFreeLists & arena;
const size_t hash = DefaultHash<KeyType>()(key);
const size_t index = hash & size_overlap_mask;
return index;
}
template <typename Key, typename Value>
void operator()(const Key & key, const Value & value) const
using KeyStateAndCellIndex = std::pair<KeyState::State, size_t>;
inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
{
size_t place_value = getCellIndex(key);
const size_t place_value_end = place_value + max_collision_length;
time_t max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
for (; place_value < place_value_end; ++place_value)
{
/// In case of complex key we keep it in arena
if constexpr (std::is_same_v<Key, StringRef>)
arena.free(const_cast<char *>(key.data), key.size);
const auto cell_place_value = place_value & size_overlap_mask;
const auto & cell = cells[cell_place_value];
if (value.place_for_serialized_columns)
arena.free(value.place_for_serialized_columns, value.allocated_size_for_columns);
if (cell.key != key)
continue;
if (unlikely(now > cell.deadline + max_lifetime_seconds))
return std::make_pair(KeyState::not_found, cell_place_value);
if (unlikely(now > cell.deadline))
return std::make_pair(KeyState::expired, cell_place_value);
return std::make_pair(KeyState::found, cell_place_value);
}
};
using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellDisposer>;
using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellDisposer>;
return std::make_pair(KeyState::not_found, place_value & size_overlap_mask);
}
using CacheLRUHashMap = std::conditional_t<
dictionary_key_type == DictionaryKeyType::simple,
SimpleKeyLRUHashMap,
ComplexKeyLRUHashMap>;
inline size_t getCellIndexForInsert(const KeyType & key) const
{
size_t place_value = getCellIndex(key);
const size_t place_value_end = place_value + max_collision_length;
size_t oldest_place_value = place_value;
CacheLRUHashMap cache;
time_t oldest_time = std::numeric_limits<time_t>::max();
for (; place_value < place_value_end; ++place_value)
{
const size_t cell_place_value = place_value & size_overlap_mask;
const Cell cell = cells[cell_place_value];
if (cell.deadline == 0)
return cell_place_value;
if (cell.key == key)
return cell_place_value;
if (cell.deadline < oldest_time)
{
oldest_time = cell.deadline;
oldest_place_value = cell_place_value;
}
}
return oldest_place_value;
}
};
}

View File

@ -12,9 +12,9 @@ struct KeyState
{
enum State: uint8_t
{
not_found = 2,
expired = 4,
found = 8,
not_found = 0,
expired = 1,
found = 2,
};
KeyState(State state_, size_t fetched_column_index_)
@ -31,9 +31,10 @@ struct KeyState
inline bool isNotFound() const { return state == State::not_found; }
inline bool isDefault() const { return is_default; }
inline void setDefault() { is_default = true; }
inline void setDefaultValue(bool is_default_value) { is_default = is_default_value; }
/// Valid only if keyState is found or expired
inline size_t getFetchedColumnIndex() const { return fetched_column_index; }
inline void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; }
private:
State state = not_found;
size_t fetched_column_index = 0;
@ -111,8 +112,8 @@ public:
/// Return size of keys in storage
virtual size_t getSize() const = 0;
/// Return maximum size of keys in storage
virtual size_t getMaxSize() const = 0;
/// Returns storage load factor
virtual double getLoadFactor() const = 0;
/// Return bytes allocated in storage
virtual size_t getBytesAllocated() const = 0;

View File

@ -17,7 +17,7 @@
#include <Common/Arena.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/MemorySanitizer.h>
#include <Common/HashTable/LRUHashMap.h>
#include <Common/HashTable/HashMap.h>
#include <IO/AIO.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/ICacheDictionaryStorage.h>
@ -56,7 +56,6 @@ struct SSDCacheDictionaryStorageConfiguration
const std::string file_path;
const size_t max_partitions_count;
const size_t max_stored_keys;
const size_t block_size;
const size_t file_blocks_size;
const size_t read_buffer_blocks_size;
@ -127,7 +126,7 @@ public:
/// Reset block with new block_data
/// block_data must be filled with zeroes if it is new block
ALWAYS_INLINE inline void reset(char * new_block_data)
inline void reset(char * new_block_data)
{
block_data = new_block_data;
current_block_offset = block_header_size;
@ -135,13 +134,13 @@ public:
}
/// Check if it is enough place to write key in block
ALWAYS_INLINE inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
{
return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size;
}
/// Check if it is enough place to write key in block
ALWAYS_INLINE inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
{
const StringRef & key = cache_key.key;
size_t complex_key_size = sizeof(key.size) + key.size;
@ -152,7 +151,7 @@ public:
/// Write key and returns offset in ssd cache block where data is written
/// It is client responsibility to check if there is enough place in block to write key
/// Returns true if key was written and false if there was not enough place to write key
ALWAYS_INLINE inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
{
assert(cache_key.size > 0);
@ -181,7 +180,7 @@ public:
return true;
}
ALWAYS_INLINE inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
{
assert(cache_key.size > 0);
@ -216,20 +215,20 @@ public:
return true;
}
ALWAYS_INLINE inline size_t getKeysSize() const { return keys_size; }
inline size_t getKeysSize() const { return keys_size; }
/// Write keys size into block header
ALWAYS_INLINE inline void writeKeysSize()
inline void writeKeysSize()
{
char * keys_size_offset_data = block_data + block_header_check_sum_size;
std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t));
}
/// Get check sum from block header
ALWAYS_INLINE inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
/// Calculate check sum in block
ALWAYS_INLINE inline size_t calculateCheckSum() const
inline size_t calculateCheckSum() const
{
size_t calculated_check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
@ -237,7 +236,7 @@ public:
}
/// Check if check sum from block header matched calculated check sum in block
ALWAYS_INLINE inline bool checkCheckSum() const
inline bool checkCheckSum() const
{
size_t calculated_check_sum = calculateCheckSum();
size_t check_sum = getCheckSum();
@ -246,16 +245,16 @@ public:
}
/// Write check sum in block header
ALWAYS_INLINE inline void writeCheckSum()
inline void writeCheckSum()
{
size_t check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
std::memcpy(block_data, &check_sum, sizeof(size_t));
}
ALWAYS_INLINE inline size_t getBlockSize() const { return block_size; }
inline size_t getBlockSize() const { return block_size; }
/// Returns block data
ALWAYS_INLINE inline char * getBlockData() const { return block_data; }
inline char * getBlockData() const { return block_data; }
/// Read keys that were serialized in block
/// It is client responsibility to ensure that simple or complex keys were written in block
@ -337,9 +336,7 @@ inline bool operator==(const SSDCacheIndex & lhs, const SSDCacheIndex & rhs)
return lhs.block_index == rhs.block_index && lhs.offset_in_block == rhs.offset_in_block;
}
/** SSDCacheMemoryBuffer initialized with block size and memory buffer blocks size.
* Allocate block_size * memory_buffer_blocks_size bytes with page alignment.
* Logically represents multiple memory_buffer_blocks_size blocks and current write block.
/** Logically represents multiple memory_buffer_blocks_size SSDCacheBlocks and current write block.
* If key cannot be written into current_write_block, current block keys size and check summ is written
* and buffer increase index of current_write_block_index.
* If current_write_block_index == memory_buffer_blocks_size write key will always returns true.
@ -444,7 +441,7 @@ private:
size_t current_block_index = 0;
};
/// TODO: Add documentation
/// Logically represents multiple memory_buffer_blocks_size SSDCacheBlocks on file system
template <typename SSDCacheKeyType>
class SSDCacheFileBuffer : private boost::noncopyable
{
@ -614,11 +611,13 @@ public:
}
template <typename FetchBlockFunc>
ALWAYS_INLINE void fetchBlocks(char * read_buffer, size_t read_from_file_buffer_blocks_size, const PaddedPODArray<size_t> & blocks_to_fetch, FetchBlockFunc && func) const
void fetchBlocks(size_t read_from_file_buffer_blocks_size, const PaddedPODArray<size_t> & blocks_to_fetch, FetchBlockFunc && func) const
{
if (blocks_to_fetch.empty())
return;
Memory<Allocator<true>> read_buffer(read_from_file_buffer_blocks_size * block_size, 4096);
size_t blocks_to_fetch_size = blocks_to_fetch.size();
PaddedPODArray<iocb> requests;
@ -631,7 +630,7 @@ public:
{
iocb request{};
char * buffer_place = read_buffer + block_size * (block_to_fetch_index % read_from_file_buffer_blocks_size);
char * buffer_place = read_buffer.data() + block_size * (block_to_fetch_index % read_from_file_buffer_blocks_size);
#if defined(__FreeBSD__)
request.aio.aio_lio_opcode = LIO_READ;
@ -751,7 +750,7 @@ private:
int fd = -1;
};
ALWAYS_INLINE inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
{
#if defined(__FreeBSD__)
return posix_fallocate(fd, offset, len);
@ -760,7 +759,7 @@ private:
#endif
}
ALWAYS_INLINE inline static char * getRequestBuffer(const iocb & request)
inline static char * getRequestBuffer(const iocb & request)
{
char * result = nullptr;
@ -773,7 +772,7 @@ private:
return result;
}
ALWAYS_INLINE inline static ssize_t eventResult(io_event & event)
inline static ssize_t eventResult(io_event & event)
{
ssize_t bytes_written;
@ -795,7 +794,13 @@ private:
size_t current_blocks_size = 0;
};
/// TODO: Add documentation
/** ICacheDictionaryStorage implementation that keeps column data serialized in memory index and in disk partitions.
* Data is first written in memory buffer.
* If memory buffer is full then buffer is flushed to disk partition.
* If memory buffer cannot be flushed to associated disk partition, then if partition
* can be allocated (current partition index < max_partitions_size) storage allocates new partition, if not old partitions are reused.
* Index maps key to partition block and offset.
*/
template <DictionaryKeyType dictionary_key_type>
class SSDCacheDictionaryStorage final : public ICacheDictionaryStorage
{
@ -806,9 +811,7 @@ public:
explicit SSDCacheDictionaryStorage(const SSDCacheDictionaryStorageConfiguration & configuration_)
: configuration(configuration_)
, file_buffer(configuration_.file_path, configuration.block_size, configuration.file_blocks_size)
, read_from_file_buffer(configuration_.block_size * configuration_.read_buffer_blocks_size, 4096)
, rnd_engine(randomSeed())
, index(configuration.max_stored_keys, false, { complex_key_arena })
{
memory_buffer_partitions.emplace_back(configuration.block_size, configuration.write_buffer_blocks_size);
}
@ -897,14 +900,31 @@ public:
size_t getSize() const override { return index.size(); }
size_t getMaxSize() const override {return index.getMaxSize(); }
double getLoadFactor() const override
{
size_t partitions_size = memory_buffer_partitions.size();
if (partitions_size == configuration.max_partitions_count)
return 1.0;
auto & current_memory_partition = memory_buffer_partitions[current_partition_index];
size_t full_partitions = partitions_size - 1;
size_t blocks_in_memory = (full_partitions * configuration.write_buffer_blocks_size) + current_memory_partition.getCurrentBlockIndex();
size_t blocks_on_disk = file_buffer.getCurrentBlockIndex();
size_t max_blocks_size = (configuration.file_blocks_size + configuration.write_buffer_blocks_size) * configuration.max_partitions_count;
double load_factor = static_cast<double>(blocks_in_memory + blocks_on_disk) / max_blocks_size;
return load_factor;
}
size_t getBytesAllocated() const override
{
size_t memory_partitions_bytes_size = memory_buffer_partitions.size() * configuration.write_buffer_blocks_size * configuration.block_size;
size_t file_partitions_bytes_size = memory_buffer_partitions.size() * configuration.file_blocks_size * configuration.block_size;
return index.getSizeInBytes() + memory_partitions_bytes_size + file_partitions_bytes_size;
return index.getBufferSizeInBytes() + memory_partitions_bytes_size + file_partitions_bytes_size;
}
private:
@ -920,8 +940,7 @@ private:
default_value
};
TimePoint deadline;
time_t deadline;
SSDCacheIndex index;
size_t in_memory_partition_index;
CellState state;
@ -933,13 +952,12 @@ private:
struct KeyToBlockOffset
{
KeyToBlockOffset(size_t key_index_, size_t offset_in_block_, bool is_expired_)
: key_index(key_index_), offset_in_block(offset_in_block_), is_expired(is_expired_)
KeyToBlockOffset(size_t key_index_, size_t offset_in_block_)
: key_index(key_index_), offset_in_block(offset_in_block_)
{}
size_t key_index = 0;
size_t offset_in_block = 0;
bool is_expired = false;
};
template <typename Result>
@ -950,20 +968,24 @@ private:
Result result;
result.fetched_columns = fetch_request.makeAttributesResultColumns();
result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
result.key_index_to_state.resize_fill(keys.size());
const auto now = std::chrono::system_clock::now();
const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
size_t fetched_columns_index = 0;
using BlockIndexToKeysMap = std::unordered_map<size_t, std::vector<KeyToBlockOffset>, DefaultHash<size_t>>;
using BlockIndexToKeysMap = absl::flat_hash_map<size_t, PaddedPODArray<KeyToBlockOffset>, DefaultHash<size_t>>;
BlockIndexToKeysMap block_to_keys_map;
absl::flat_hash_set<size_t, DefaultHash<size_t>> unique_blocks_to_request;
PaddedPODArray<size_t> blocks_to_request;
std::chrono::seconds strict_max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
time_t strict_max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
size_t keys_size = keys.size();
for (size_t attribute_size = 0; attribute_size < fetch_request.attributesSize(); ++attribute_size)
if (fetch_request.shouldFillResultColumnWithIndex(attribute_size))
result.fetched_columns[attribute_size]->reserve(keys_size);
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys[key_index];
@ -978,9 +1000,7 @@ private:
const auto & cell = it->getMapped();
bool has_deadline = cellHasDeadline(cell);
if (has_deadline && now > cell.deadline + strict_max_lifetime_seconds)
if (unlikely(now > cell.deadline + strict_max_lifetime_seconds))
{
++result.not_found_keys_size;
continue;
@ -989,14 +1009,14 @@ private:
bool cell_is_expired = false;
KeyState::State key_state = KeyState::found;
if (has_deadline && now > cell.deadline)
if (now > cell.deadline)
{
cell_is_expired = true;
key_state = KeyState::expired;
}
result.expired_keys_size += cell_is_expired;
result.found_keys_size += !cell_is_expired;
result.expired_keys_size += static_cast<size_t>(cell_is_expired);
result.found_keys_size += static_cast<size_t>(!cell_is_expired);
switch (cell.state)
{
@ -1012,13 +1032,20 @@ private:
}
case Cell::on_disk:
{
block_to_keys_map[cell.index.block_index].emplace_back(key_index, cell.index.offset_in_block, cell_is_expired);
PaddedPODArray<KeyToBlockOffset> & keys_block = block_to_keys_map[cell.index.block_index];
keys_block.emplace_back(key_index, cell.index.offset_in_block);
if (!unique_blocks_to_request.contains(cell.index.block_index))
{
KeyState::State state = cell_is_expired ? KeyState::expired : KeyState::found;
/// Fetched column index will be set later during fetch blocks
result.key_index_to_state[key_index] = {state, 0};
auto insert_result = unique_blocks_to_request.insert(cell.index.block_index);
bool was_inserted = insert_result.second;
if (was_inserted)
blocks_to_request.emplace_back(cell.index.block_index);
unique_blocks_to_request.insert(cell.index.block_index);
}
break;
}
case Cell::default_value:
@ -1037,7 +1064,7 @@ private:
/// Sort blocks by offset before start async io requests
std::sort(blocks_to_request.begin(), blocks_to_request.end());
file_buffer.fetchBlocks(read_from_file_buffer.m_data, configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data)
file_buffer.fetchBlocks(configuration.read_buffer_blocks_size, blocks_to_request, [&](size_t block_index, char * block_data)
{
auto & keys_in_block = block_to_keys_map[block_index];
@ -1046,10 +1073,7 @@ private:
char * key_data = block_data + key_in_block.offset_in_block;
deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, key_data);
if (key_in_block.is_expired)
result.key_index_to_state[key_in_block.key_index] = {KeyState::expired, fetched_columns_index};
else
result.key_index_to_state[key_in_block.key_index] = {KeyState::found, fetched_columns_index};
result.key_index_to_state[key_in_block.key_index].setFetchedColumnIndex(fetched_columns_index);
++fetched_columns_index;
}
@ -1087,7 +1111,7 @@ private:
throw Exception("Serialized columns size is greater than allowed block size and metadata", ErrorCodes::UNSUPPORTED_METHOD);
/// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
index.erase(key);
eraseKeyFromIndex(key);
Cell cell;
setCellDeadline(cell, now);
@ -1114,8 +1138,7 @@ private:
for (auto key : keys)
{
/// We cannot reuse place that is already allocated in file or memory cache so we erase key from index
index.erase(key);
eraseKeyFromIndex(key);
Cell cell;
@ -1135,7 +1158,7 @@ private:
key = updated_key;
}
index.insert(key, cell);
index[key] = cell;
}
}
@ -1188,7 +1211,7 @@ private:
cell.index = cache_index;
cell.in_memory_partition_index = current_partition_index;
index.insert(ssd_cache_key.key, cell);
index[ssd_cache_key.key] = cell;
break;
}
else
@ -1218,7 +1241,7 @@ private:
if (old_key_cell.isOnDisk() &&
old_key_block >= block_index_in_file_before_write &&
old_key_block < file_read_end_block_index)
index.erase(old_key);
eraseKeyFromIndex(old_key);
}
}
}
@ -1271,7 +1294,7 @@ private:
cell.index = cache_index;
cell.in_memory_partition_index = current_partition_index;
index.insert(ssd_cache_key.key, cell);
index[ssd_cache_key.key] = cell;
break;
}
else
@ -1296,16 +1319,12 @@ private:
}
}
inline static bool cellHasDeadline(const Cell & cell)
{
return cell.deadline != std::chrono::system_clock::from_time_t(0);
}
inline void setCellDeadline(Cell & cell, TimePoint now)
{
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
{
cell.deadline = std::chrono::system_clock::from_time_t(0);
auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
return;
}
@ -1313,47 +1332,45 @@ private:
size_t max_sec_lifetime = configuration.lifetime.max_sec;
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
cell.deadline = now + std::chrono::seconds{distribution(rnd_engine)};
auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
}
template <typename>
friend class ArenaCellKeyDisposer;
inline void eraseKeyFromIndex(KeyType key)
{
auto it = index.find(key);
if (it == nullptr)
return;
/// In case of complex key in arena key is serialized from hash table
KeyType key_copy = it->getKey();
index.erase(key);
if constexpr (std::is_same_v<KeyType, StringRef>)
complex_key_arena.free(const_cast<char *>(key_copy.data), key_copy.size);
}
SSDCacheDictionaryStorageConfiguration configuration;
SSDCacheFileBuffer<SSDCacheKeyType> file_buffer;
Memory<Allocator<true>> read_from_file_buffer;
std::vector<SSDCacheMemoryBuffer<SSDCacheKeyType>> memory_buffer_partitions;
pcg64 rnd_engine;
class ArenaCellKeyDisposer
{
public:
ArenaWithFreeLists & arena;
using SimpleKeyHashMap = HashMap<UInt64, Cell>;
using ComplexKeyHashMap = HashMapWithSavedHash<StringRef, Cell>;
template <typename Key, typename Value>
void operator()(const Key & key, const Value &) const
{
/// In case of complex key we keep it in arena
if constexpr (std::is_same_v<Key, StringRef>)
arena.free(const_cast<char *>(key.data), key.size);
}
};
using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellKeyDisposer>;
using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellKeyDisposer>;
using CacheLRUHashMap = std::conditional_t<
using CacheMap = std::conditional_t<
dictionary_key_type == DictionaryKeyType::simple,
SimpleKeyLRUHashMap,
ComplexKeyLRUHashMap>;
SimpleKeyHashMap,
ComplexKeyHashMap>;
ArenaWithFreeLists complex_key_arena;
CacheLRUHashMap index;
CacheMap index;
size_t current_partition_index = 0;

View File

@ -1,154 +0,0 @@
clickhouse-client --query="DROP TABLE IF EXISTS simple_cache_dictionary_table_source";
clickhouse-client --query="CREATE TABLE simple_cache_dictionary_table_source (id UInt64, value1 String, value2 UInt64, value3 String, value4 Float64, value5 Decimal64(4)) ENGINE=TinyLog;"
clickhouse-client --query="INSERT INTO simple_cache_dictionary_table_source SELECT number, concat('Value1 ', toString(number)), number, concat('Value3 ', toString(number)), toFloat64(number), cast(number, 'Decimal64(4)') FROM system.numbers LIMIT 1000000;"
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_simple_cache_dictionary (
id UInt64,
value1 String,
value2 UInt64,
value3 String,
value4 Float64,
value5 Decimal64(4)
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
LIFETIME(MIN 300 MAX 300)
LAYOUT(CACHE(SIZE_IN_CELLS 100000));"
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_ssd_simple_cache_dictionary (
id UInt64,
value1 String,
value2 UInt64,
value3 String,
value4 Float64,
value5 Decimal64(4)
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
LIFETIME(MIN 300 MAX 300)
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 WRITE_BUFFER_SIZE 327680 MAX_STORED_KEYS 1048576 PATH '/opt/mkita/ClickHouse/build_release/programs/ssd_cache'));"
clickhouse-client --multiquery --query="CREATE DICTIONARY clickhouse_dummy_simple_cache_dictionary (
id UInt64,
value1 String,
value2 UInt64,
value3 String,
value4 Float64,
value5 Decimal64(4)
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_cache_dictionary_table_source' PASSWORD '' DB 'default'))
LIFETIME(MIN 300 MAX 300)
LAYOUT(DUMMY_SIMPLE());"
./clickhouse-benchmark --query="SELECT
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value1', number),
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value2', number),
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value3', number),
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value4', number),
dictGet('default.clickhouse_dummy_simple_cache_dictionary', 'value5', number)
FROM system.numbers
LIMIT 10000
FORMAT Null"
./clickhouse-benchmark --query="SELECT
dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number)
FROM system.numbers
LIMIT 10000
FORMAT Null"
./clickhouse-benchmark --query="SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number) FROM system.numbers_mt LIMIT 10000 FORMAT Null"
./clickhouse-benchmark --query="SELECT
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value3', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value4', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value5', number)
FROM system.numbers
LIMIT 10000
FORMAT Null"
./clickhouse-benchmark --query="SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number) FROM system.numbers_mt LIMIT 10000 FORMAT Null"
SELECT
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value1', number),
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value2', number),
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value3', number),
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value4', number),
dictGet('default.clickhouse_ssd_simple_cache_dictionary', 'value5', number)
FROM system.numbers
LIMIT 10000
FORMAT Null
SELECT dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number) FROM system.numbers LIMIT 10000 FORMAT Null
SELECT dictGet('default.clickhouse_ssd_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number) FROM system.numbers LIMIT 10000
FORMAT Null
SELECT
dictGet('default.clickhouse_simple_cache_dictionary', ('value1', 'value2', 'value3', 'value4', 'value5'), number)
FROM system.numbers
LIMIT 10000
FORMAT
Null
SELECT
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value3', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value4', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value5', number)
FROM system.numbers
LIMIT 10000
FORMAT
Null
SELECT
dictGet('default.clickhouse_simple_cache_dictionary', 'value1', number),
dictGet('default.clickhouse_simple_cache_dictionary', 'value2', number)
FROM system.numbers
LIMIT 10000
FORMAT Null
SELECT
dictGet('clickhouse_simple_cache_dictionary', 'value1', number)
FROM system.numbers
LIMIT 100000
FORMAT Null
SELECT
dictGet('clickhouse_simple_cache_dictionary', 'value2', number)
FROM system.numbers
LIMIT 100000
FORMAT Null
SELECT
dictGet('clickhouse_simple_cache_dictionary', 'value3', number)
FROM system.numbers
LIMIT 100000
FORMAT Null
SELECT
dictGet('clickhouse_simple_cache_dictionary', 'value4', number)
FROM system.numbers
LIMIT 100000
FORMAT Null
SELECT
dictGet('clickhouse_simple_cache_dictionary', 'value5', number)
FROM system.numbers
LIMIT 100000
FORMAT Null
SELECT
dictGet('clickhouse_simple_cache_dictionary', 'value1', number),
dictGet('clickhouse_simple_cache_dictionary', 'value2', number),
dictGet('clickhouse_simple_cache_dictionary', 'value3', number),
dictGet('clickhouse_simple_cache_dictionary', 'value4', number),
dictGet('clickhouse_simple_cache_dictionary', 'value5', number)
FROM system.numbers
LIMIT 100000
FORMAT Null
SELECT * FROM clickhouse_simple_cache_dictionary_table;

View File

@ -1,6 +1,6 @@
#include "CacheDictionary.h"
#include "SSDCacheDictionaryStorage.h"
#include "CacheDictionaryStorage.h"
#include "SSDCacheDictionaryStorage.h"
#include <Dictionaries/DictionaryFactory.h>
namespace DB
@ -20,13 +20,13 @@ CacheDictionaryStorageConfiguration parseCacheStorageConfiguration(
const DictionaryLifetime & dict_lifetime,
DictionaryKeyType dictionary_key_type)
{
String dictionary_type_prefix = dictionary_key_type == DictionaryKeyType::complex ? ".complex_key_cache." : ".cache.";
String dictionary_type_prefix = (dictionary_key_type == DictionaryKeyType::complex) ? ".complex_key_cache." : ".cache.";
String dictionary_configuration_prefix = layout_prefix + dictionary_type_prefix;
const size_t size = config.getUInt64(dictionary_configuration_prefix + "size_in_cells");
if (size == 0)
throw Exception(ErrorCodes::TOO_SMALL_BUFFER_SIZE,
"({}: cache dictionary cannot have 0 cells",
"({}): cache dictionary cannot have 0 cells",
full_name);
size_t dict_lifetime_seconds = static_cast<size_t>(dict_lifetime.max_sec);
@ -59,7 +59,6 @@ SSDCacheDictionaryStorageConfiguration parseSSDCacheStorageConfiguration(
static constexpr size_t DEFAULT_READ_BUFFER_SIZE_BYTES = 16 * DEFAULT_SSD_BLOCK_SIZE_BYTES;
static constexpr size_t DEFAULT_WRITE_BUFFER_SIZE_BYTES = DEFAULT_SSD_BLOCK_SIZE_BYTES;
static constexpr size_t DEFAULT_MAX_STORED_KEYS = 100000;
static constexpr size_t DEFAULT_PARTITIONS_COUNT = 16;
const size_t max_partitions_count
@ -94,16 +93,11 @@ SSDCacheDictionaryStorageConfiguration parseSSDCacheStorageConfiguration(
if (directory_path.at(0) != '/')
directory_path = std::filesystem::path{config.getString("path")}.concat(directory_path).string();
const size_t max_stored_keys_in_partition
= config.getInt64(dictionary_configuration_prefix + "max_stored_keys", DEFAULT_MAX_STORED_KEYS);
const size_t rounded_size = roundUpToPowerOfTwoOrZero(max_stored_keys_in_partition);
SSDCacheDictionaryStorageConfiguration configuration{
strict_max_lifetime_seconds,
dict_lifetime,
directory_path,
max_partitions_count,
rounded_size,
block_size,
file_size / block_size,
read_buffer_size / block_size,
@ -194,7 +188,8 @@ DictionaryPtr createCacheDictionaryLayout(
const bool allow_read_expired_keys = config.getBool(layout_prefix + ".cache.allow_read_expired_keys", false);
auto storage_configuration = parseCacheStorageConfiguration(full_name, config, layout_prefix, dict_lifetime, dictionary_key_type);
auto storage = std::make_shared<CacheDictionaryStorage<dictionary_key_type>>(storage_configuration);
std::shared_ptr<ICacheDictionaryStorage> storage = std::make_shared<CacheDictionaryStorage<dictionary_key_type>>(dict_struct, storage_configuration);
auto update_queue_configuration = parseCacheDictionaryUpdateQueueConfiguration(full_name, config, layout_prefix, dictionary_key_type);

View File

@ -209,7 +209,13 @@ void DiskCacheWrapper::clearDirectory(const String & path)
void DiskCacheWrapper::moveDirectory(const String & from_path, const String & to_path)
{
if (cache_disk->exists(from_path))
{
/// Destination directory may not be empty if previous directory move attempt was failed.
if (cache_disk->exists(to_path) && cache_disk->isDirectory(to_path))
cache_disk->clearDirectory(to_path);
cache_disk->moveDirectory(from_path, to_path);
}
DiskDecorator::moveDirectory(from_path, to_path);
}

View File

@ -538,8 +538,9 @@ private:
[[maybe_unused]] const auto block_size = static_cast<size_t>(EVP_CIPHER_block_size(evp_cipher));
[[maybe_unused]] const auto iv_size = static_cast<size_t>(EVP_CIPHER_iv_length(evp_cipher));
const auto key_size = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
const auto tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
const size_t key_size = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
static constexpr size_t tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
auto decrypted_result_column = ColumnString::create();
auto & decrypted_result_column_data = decrypted_result_column->getChars();
@ -549,9 +550,17 @@ private:
size_t resulting_size = 0;
for (size_t r = 0; r < input_rows_count; ++r)
{
resulting_size += input_column->getDataAt(r).size + 1;
size_t string_size = input_column->getDataAt(r).size;
resulting_size += string_size + 1; /// With terminating zero.
if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM)
{
if (string_size < tag_size)
throw Exception("Encrypted data is smaller than the size of additional data for AEAD mode, cannot decrypt.",
ErrorCodes::BAD_ARGUMENTS);
resulting_size -= tag_size;
}
}
#if defined(MEMORY_SANITIZER)
@ -565,6 +574,7 @@ private:
decrypted_result_column_data.resize(resulting_size);
#endif
}
auto * decrypted = decrypted_result_column_data.data();
KeyHolder<mode> key_holder;
@ -631,7 +641,7 @@ private:
// 1.a.2: Set AAD if present
if (aad_column)
{
const auto aad_data = aad_column->getDataAt(r);
StringRef aad_data = aad_column->getDataAt(r);
int tmp_len = 0;
if (aad_data.size != 0 && EVP_DecryptUpdate(evp_ctx, nullptr, &tmp_len,
reinterpret_cast<const unsigned char *>(aad_data.data), aad_data.size) != 1)

View File

@ -42,11 +42,11 @@ struct SimdJSONParser
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().first; }
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().first; }
ALWAYS_INLINE double getDouble() const { return element.get_double().first; }
ALWAYS_INLINE bool getBool() const { return element.get_bool().first; }
ALWAYS_INLINE std::string_view getString() const { return element.get_string().first; }
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
ALWAYS_INLINE std::string_view getString() const { return element.get_string().value_unsafe(); }
ALWAYS_INLINE Array getArray() const;
ALWAYS_INLINE Object getObject() const;
@ -75,7 +75,7 @@ struct SimdJSONParser
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
ALWAYS_INLINE Iterator end() const { return array.end(); }
ALWAYS_INLINE size_t size() const { return array.size(); }
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).first; }
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).value_unsafe(); }
private:
simdjson::dom::array array;
@ -111,7 +111,7 @@ struct SimdJSONParser
if (x.error())
return false;
result = x.first;
result = x.value_unsafe();
return true;
}
@ -137,7 +137,7 @@ struct SimdJSONParser
if (document.error())
return false;
result = document.first;
result = document.value_unsafe();
return true;
}
@ -155,12 +155,12 @@ private:
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
{
return element.get_array().first;
return element.get_array().value_unsafe();
}
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
{
return element.get_object().first;
return element.get_object().value_unsafe();
}
}

View File

@ -49,8 +49,11 @@ public:
{}
template <typename ... Args>
inline auto execute(const DateTime64 & t, Args && ... args) const
inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const
{
/// Type conversion from float to integer may be required.
/// We are Ok with implementation specific result for out of range and denormals conversion.
if constexpr (TransformHasExecuteOverload_v<DateTime64, decltype(scale_multiplier), Args...>)
{
return wrapped_transform.execute(t, scale_multiplier, std::forward<Args>(args)...);

View File

@ -190,7 +190,7 @@ private:
}
static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
if (static_cast<size_t>(max_key - min_key) > MAX_ARRAY_SIZE)
if (static_cast<size_t>(max_key) - static_cast<size_t>(min_key) > MAX_ARRAY_SIZE)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
/* fill the result arrays */

View File

@ -16,6 +16,7 @@ namespace ErrorCodes
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
namespace
@ -110,6 +111,9 @@ public:
arguments[2].column->getFloat64(i),
max_width);
if (!isFinite(width))
throw Exception("Value of width must not be NaN and Inf", ErrorCodes::BAD_ARGUMENTS);
size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1;
dst_chars.resize(next_size);
UnicodeBar::render(width, reinterpret_cast<char *>(&dst_chars[current_offset]));

View File

@ -41,7 +41,8 @@ void registerFunctionThrowIf(FunctionFactory &);
void registerFunctionVersion(FunctionFactory &);
void registerFunctionBuildId(FunctionFactory &);
void registerFunctionUptime(FunctionFactory &);
void registerFunctionTimeZone(FunctionFactory &);
void registerFunctionTimezone(FunctionFactory &);
void registerFunctionTimezoneOf(FunctionFactory &);
void registerFunctionRunningAccumulate(FunctionFactory &);
void registerFunctionRunningDifference(FunctionFactory &);
void registerFunctionRunningDifferenceStartingWithFirstValue(FunctionFactory &);
@ -111,7 +112,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
registerFunctionVersion(factory);
registerFunctionBuildId(factory);
registerFunctionUptime(factory);
registerFunctionTimeZone(factory);
registerFunctionTimezone(factory);
registerFunctionTimezoneOf(factory);
registerFunctionRunningAccumulate(factory);
registerFunctionRunningDifference(factory);
registerFunctionRunningDifferenceStartingWithFirstValue(factory);

View File

@ -12,13 +12,13 @@ namespace
/** Returns the server time zone.
*/
class FunctionTimeZone : public IFunction
class FunctionTimezone : public IFunction
{
public:
static constexpr auto name = "timezone";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionTimeZone>();
return std::make_shared<FunctionTimezone>();
}
String getName() const override
@ -45,9 +45,10 @@ public:
}
void registerFunctionTimeZone(FunctionFactory & factory)
void registerFunctionTimezone(FunctionFactory & factory)
{
factory.registerFunction<FunctionTimeZone>();
factory.registerFunction<FunctionTimezone>();
factory.registerAlias("timeZone", "timezone");
}
}

View File

@ -0,0 +1,118 @@
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime.h>
#include <common/DateLUTImpl.h>
#include <Core/Field.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace
{
/** timezoneOf(x) - get the name of the timezone of DateTime data type.
* Example: Europe/Moscow.
*/
class ExecutableFunctionTimezoneOf : public IExecutableFunctionImpl
{
public:
static constexpr auto name = "timezoneOf";
String getName() const override { return name; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
/// Execute the function on the columns.
ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
return DataTypeString().createColumnConst(input_rows_count,
dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
}
};
class BaseFunctionTimezoneOf : public IFunctionBaseImpl
{
public:
BaseFunctionTimezoneOf(DataTypes argument_types_, DataTypePtr return_type_)
: argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
static constexpr auto name = "timezoneOf";
String getName() const override { return name; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
const DataTypes & getArgumentTypes() const override { return argument_types; }
const DataTypePtr & getResultType() const override { return return_type; }
ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName &) const override
{
return std::make_unique<ExecutableFunctionTimezoneOf>();
}
ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & arguments) const override
{
DataTypePtr type_no_nullable = removeNullable(arguments[0].type);
return DataTypeString().createColumnConst(1,
dynamic_cast<const TimezoneMixin &>(*type_no_nullable).getTimeZone().getTimeZone());
}
private:
DataTypes argument_types;
DataTypePtr return_type;
};
class FunctionTimezoneOfBuilder : public IFunctionOverloadResolverImpl
{
public:
static constexpr auto name = "timezoneOf";
String getName() const override { return name; }
static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique<FunctionTimezoneOfBuilder>(); }
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnType(const DataTypes & types) const override
{
DataTypePtr type_no_nullable = removeNullable(types[0]);
if (isDateTime(type_no_nullable) || isDateTime64(type_no_nullable))
return std::make_shared<DataTypeString>();
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad argument for function {}, should be DateTime or DateTime64", name);
}
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
{
return std::make_unique<BaseFunctionTimezoneOf>(DataTypes{arguments[0].type}, return_type);
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; }
};
}
void registerFunctionTimezoneOf(FunctionFactory & factory)
{
factory.registerFunction<FunctionTimezoneOfBuilder>();
factory.registerAlias("timeZoneOf", "timezoneOf");
}
}

View File

@ -21,11 +21,11 @@ namespace
{
/// Just changes time zone information for data type. The calculation is free.
class FunctionToTimeZone : public IFunction
class FunctionToTimezone : public IFunction
{
public:
static constexpr auto name = "toTimeZone";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimeZone>(); }
static constexpr auto name = "toTimezone";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionToTimezone>(); }
String getName() const override
{
@ -64,7 +64,8 @@ public:
void registerFunctionToTimeZone(FunctionFactory & factory)
{
factory.registerFunction<FunctionToTimeZone>();
factory.registerFunction<FunctionToTimezone>();
factory.registerAlias("toTimeZone", "toTimezone");
}
}

View File

@ -467,6 +467,7 @@ SRCS(
timeSlot.cpp
timeSlots.cpp
timezone.cpp
timezoneOf.cpp
timezoneOffset.cpp
toColumnTypeName.cpp
toCustomWeek.cpp
@ -506,7 +507,7 @@ SRCS(
toStartOfTenMinutes.cpp
toStartOfYear.cpp
toTime.cpp
toTimeZone.cpp
toTimezone.cpp
toTypeName.cpp
toUnixTimestamp64Micro.cpp
toUnixTimestamp64Milli.cpp

View File

@ -1,6 +1,7 @@
#include <Poco/Net/NetException.h>
#include <IO/ReadBufferFromPocoSocket.h>
#include <IO/TimeoutSetter.h>
#include <Common/Exception.h>
#include <Common/NetException.h>
#include <Common/Stopwatch.h>
@ -27,23 +28,23 @@ bool ReadBufferFromPocoSocket::nextImpl()
ssize_t bytes_read = 0;
Stopwatch watch;
int flags = 0;
if (async_callback)
flags |= MSG_DONTWAIT;
/// Add more details to exceptions.
try
{
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
/// If async_callback is specified, and read is blocking, run async_callback and try again later.
/// If async_callback is specified, and read will block, run async_callback and try again later.
/// It is expected that file descriptor may be polled externally.
/// Note that receive timeout is not checked here. External code should check it while polling.
while (bytes_read < 0 && async_callback && errno == EAGAIN)
{
while (async_callback && !socket.poll(0, Poco::Net::Socket::SELECT_READ))
async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description);
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
}
/// receiveBytes in SecureStreamSocket throws TimeoutException after max(receive_timeout, send_timeout),
/// but we want to get this exception exactly after receive_timeout. So, set send_timeout = receive_timeout
/// before receiveBytes.
std::unique_ptr<TimeoutSetter> timeout_setter = nullptr;
if (socket.secure())
timeout_setter = std::make_unique<TimeoutSetter>(dynamic_cast<Poco::Net::StreamSocket &>(socket), socket.getReceiveTimeout(), socket.getReceiveTimeout());
bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size());
}
catch (const Poco::Net::NetException & e)
{

View File

@ -1,4 +1,4 @@
#include "TimeoutSetter.h"
#include <IO/TimeoutSetter.h>
#include <common/logger_useful.h>

View File

@ -1,6 +1,7 @@
#include <Poco/Net/NetException.h>
#include <IO/WriteBufferFromPocoSocket.h>
#include <IO/TimeoutSetter.h>
#include <Common/Exception.h>
#include <Common/NetException.h>
@ -40,6 +41,13 @@ void WriteBufferFromPocoSocket::nextImpl()
/// Add more details to exceptions.
try
{
/// sendBytes in SecureStreamSocket throws TimeoutException after max(receive_timeout, send_timeout),
/// but we want to get this exception exactly after send_timeout. So, set receive_timeout = send_timeout
/// before sendBytes.
std::unique_ptr<TimeoutSetter> timeout_setter = nullptr;
if (socket.secure())
timeout_setter = std::make_unique<TimeoutSetter>(dynamic_cast<Poco::Net::StreamSocket &>(socket), socket.getSendTimeout(), socket.getSendTimeout());
res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written);
}
catch (const Poco::Net::NetException & e)

View File

@ -50,6 +50,7 @@ SRCS(
ReadBufferFromPocoSocket.cpp
ReadHelpers.cpp
SeekAvoidingReadBuffer.cpp
TimeoutSetter.cpp
UseSSL.cpp
WriteBufferFromFile.cpp
WriteBufferFromFileBase.cpp

View File

@ -818,13 +818,10 @@ private:
if (!min_id)
min_id = getMinIDToFinishLoading(forced_to_reload);
if (info->state_id >= min_id)
return true; /// stop
if (info->loading_id < min_id)
startLoading(*info, forced_to_reload, *min_id);
/// Wait for the next event if loading wasn't completed, and stop otherwise.
/// Wait for the next event if loading wasn't completed, or stop otherwise.
return (info->state_id >= min_id);
};
@ -850,9 +847,6 @@ private:
if (filter && !filter(name))
continue;
if (info.state_id >= min_id)
continue;
if (info.loading_id < min_id)
startLoading(info, forced_to_reload, *min_id);

View File

@ -260,7 +260,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
renamed = true;
}
database->loadStoredObjects(context, has_force_restore_data_flag, create.attach && force_attach);
/// We use global context here, because storages lifetime is bigger than query context lifetime
database->loadStoredObjects(context.getGlobalContext(), has_force_restore_data_flag, create.attach && force_attach);
}
catch (...)
{
@ -970,7 +971,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
if (create.as_table_function)
{
const auto & factory = TableFunctionFactory::instance();
res = factory.get(create.as_table_function, context)->execute(create.as_table_function, context, create.table, properties.columns);
auto table_func = factory.get(create.as_table_function, context);
res = table_func->execute(create.as_table_function, context, create.table, properties.columns);
res->renameInMemory({create.database, create.table, create.uuid});
}
else

View File

@ -393,7 +393,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
view = nullptr;
}
if (try_move_to_prewhere && storage && query.where() && !query.prewhere() && !query.final())
if (try_move_to_prewhere && storage && query.where() && !query.prewhere())
{
/// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable
if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty())

View File

@ -1,5 +1,6 @@
#include <Interpreters/WindowDescription.h>
#include <Core/Field.h>
#include <IO/Operators.h>
#include <Parsers/ASTFunction.h>
@ -60,7 +61,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
}
else
{
buf << abs(begin_offset);
buf << applyVisitor(FieldVisitorToString(), begin_offset);
buf << " "
<< (begin_preceding ? "PRECEDING" : "FOLLOWING");
}
@ -77,7 +78,7 @@ void WindowFrame::toString(WriteBuffer & buf) const
}
else
{
buf << abs(end_offset);
buf << applyVisitor(FieldVisitorToString(), end_offset);
buf << " "
<< (end_preceding ? "PRECEDING" : "FOLLOWING");
}
@ -121,23 +122,33 @@ void WindowFrame::checkValid() const
if (end_type == BoundaryType::Offset
&& begin_type == BoundaryType::Offset)
{
// Frame starting with following rows can't have preceding rows.
if (!(end_preceding && !begin_preceding))
// Frame start offset must be less or equal that the frame end offset.
bool begin_less_equal_end;
if (begin_preceding && end_preceding)
{
// Frame start offset must be less or equal that the frame end offset.
const bool begin_before_end
= begin_offset * (begin_preceding ? -1 : 1)
<= end_offset * (end_preceding ? -1 : 1);
if (!begin_before_end)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Frame start offset {} {} does not precede the frame end offset {} {}",
begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
}
return;
begin_less_equal_end = begin_offset >= end_offset;
}
else if (begin_preceding && !end_preceding)
{
begin_less_equal_end = true;
}
else if (!begin_preceding && end_preceding)
{
begin_less_equal_end = false;
}
else /* if (!begin_preceding && !end_preceding) */
{
begin_less_equal_end = begin_offset <= end_offset;
}
if (!begin_less_equal_end)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Frame start offset {} {} does not precede the frame end offset {} {}",
begin_offset, begin_preceding ? "PRECEDING" : "FOLLOWING",
end_offset, end_preceding ? "PRECEDING" : "FOLLOWING");
}
return;
}
throw Exception(ErrorCodes::BAD_ARGUMENTS,

View File

@ -44,14 +44,13 @@ struct WindowFrame
// Offset might be both preceding and following, controlled by begin_preceding,
// but the offset value must be positive.
BoundaryType begin_type = BoundaryType::Unbounded;
// This should have been a Field but I'm getting some crazy linker errors.
int64_t begin_offset = 0;
Field begin_offset = 0;
bool begin_preceding = true;
// Here as well, Unbounded can only be UNBOUNDED FOLLOWING, and end_preceding
// must be false.
BoundaryType end_type = BoundaryType::Current;
int64_t end_offset = 0;
Field end_offset = 0;
bool end_preceding = false;

View File

@ -377,6 +377,11 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co
else if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type))
{
const IDataType & nested_type = *nullable_type->getNestedType();
/// NULL remains NULL after any conversion.
if (WhichDataType(nested_type).isNothing())
return {};
if (from_type_hint && from_type_hint->equals(nested_type))
return from_value;
return convertFieldToTypeImpl(from_value, nested_type, from_type_hint);

View File

@ -290,8 +290,6 @@ std::optional<Blocks> evaluateExpressionOverConstantCondition(const ASTPtr & nod
{
Blocks result;
// TODO: `node` may be always-false literal.
if (const auto * fn = node->as<ASTFunction>())
{
const auto dnf = analyzeFunction(fn, target_expr);
@ -350,6 +348,14 @@ std::optional<Blocks> evaluateExpressionOverConstantCondition(const ASTPtr & nod
}
}
}
else if (const auto * literal = node->as<ASTLiteral>())
{
// Check if it's always true or false.
if (literal->value.getType() == Field::Types::UInt64 && literal->value.get<UInt64>() == 0)
return {result};
else
return {};
}
return {result};
}

View File

@ -137,8 +137,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
if (window())
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str <<
"WINDOW " << (s.hilite ? hilite_none : "");
window()->formatImpl(s, state, frame);
"WINDOW" << (s.hilite ? hilite_none : "");
window()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
}
if (orderBy())

View File

@ -35,6 +35,8 @@ String ASTWindowDefinition::getID(char) const
void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
FormatState & state, FormatStateStacked format_frame) const
{
format_frame.expression_list_prepend_whitespace = false;
if (partition_by)
{
settings.ostr << "PARTITION BY ";
@ -70,7 +72,8 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
}
else
{
settings.ostr << abs(frame.begin_offset);
settings.ostr << applyVisitor(FieldVisitorToString(),
frame.begin_offset);
settings.ostr << " "
<< (!frame.begin_preceding ? "FOLLOWING" : "PRECEDING");
}
@ -85,7 +88,8 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
}
else
{
settings.ostr << abs(frame.end_offset);
settings.ostr << applyVisitor(FieldVisitorToString(),
frame.end_offset);
settings.ostr << " "
<< (!frame.end_preceding ? "FOLLOWING" : "PRECEDING");
}

View File

@ -581,30 +581,20 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
else if (parser_literal.parse(pos, ast_literal, expected))
{
const Field & value = ast_literal->as<ASTLiteral &>().value;
if (!isInt64FieldType(value.getType()))
if ((node->frame.type == WindowFrame::FrameType::Rows
|| node->frame.type == WindowFrame::FrameType::Groups)
&& !(value.getType() == Field::Types::UInt64
|| (value.getType() == Field::Types::Int64
&& value.get<Int64>() >= 0)))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Only integer frame offsets are supported, '{}' is not supported.",
"Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
WindowFrame::toString(node->frame.type),
applyVisitor(FieldVisitorToString(), value),
Field::Types::toString(value.getType()));
}
node->frame.begin_offset = value.get<Int64>();
node->frame.begin_offset = value;
node->frame.begin_type = WindowFrame::BoundaryType::Offset;
// We can easily get a UINT64_MAX here, which doesn't even fit into
// int64_t. Not sure what checks we are going to need here after we
// support floats and dates.
if (node->frame.begin_offset > INT_MAX || node->frame.begin_offset < INT_MIN)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Frame offset must be between {} and {}, but {} is given",
INT_MAX, INT_MIN, node->frame.begin_offset);
}
if (node->frame.begin_offset < 0)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Frame start offset must be greater than zero, {} given",
node->frame.begin_offset);
}
}
else
{
@ -652,28 +642,20 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
else if (parser_literal.parse(pos, ast_literal, expected))
{
const Field & value = ast_literal->as<ASTLiteral &>().value;
if (!isInt64FieldType(value.getType()))
if ((node->frame.type == WindowFrame::FrameType::Rows
|| node->frame.type == WindowFrame::FrameType::Groups)
&& !(value.getType() == Field::Types::UInt64
|| (value.getType() == Field::Types::Int64
&& value.get<Int64>() >= 0)))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Only integer frame offsets are supported, '{}' is not supported.",
"Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.",
WindowFrame::toString(node->frame.type),
applyVisitor(FieldVisitorToString(), value),
Field::Types::toString(value.getType()));
}
node->frame.end_offset = value.get<Int64>();
node->frame.end_offset = value;
node->frame.end_type = WindowFrame::BoundaryType::Offset;
if (node->frame.end_offset > INT_MAX || node->frame.end_offset < INT_MIN)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Frame offset must be between {} and {}, but {} is given",
INT_MAX, INT_MIN, node->frame.end_offset);
}
if (node->frame.end_offset < 0)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Frame end offset must be greater than zero, {} given",
node->frame.end_offset);
}
}
else
{

View File

@ -275,7 +275,8 @@ Token Lexer::nextTokenImpl()
else
++pos;
}
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
pos = end;
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, pos);
}
}
return Token(TokenType::Slash, token_begin, pos);

View File

@ -3,6 +3,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Common/Arena.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/convertFieldToType.h>
@ -27,7 +28,8 @@ public:
virtual ~IWindowFunction() = default;
// Must insert the result for current_row.
virtual void windowInsertResultInto(IColumn & to, const WindowTransform * transform) = 0;
virtual void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) = 0;
};
// Compares ORDER BY column values at given rows to find the boundaries of frame:
@ -37,7 +39,7 @@ template <typename ColumnType>
static int compareValuesWithOffset(const IColumn * _compared_column,
size_t compared_row, const IColumn * _reference_column,
size_t reference_row,
uint64_t _offset,
const Field & _offset,
bool offset_is_preceding)
{
// Casting the columns to the known type here makes it faster, probably
@ -46,7 +48,8 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
_compared_column);
const auto * reference_column = assert_cast<const ColumnType *>(
_reference_column);
const auto offset = static_cast<typename ColumnType::ValueType>(_offset);
const auto offset = _offset.get<typename ColumnType::ValueType>();
assert(offset >= 0);
const auto compared_value_data = compared_column->getDataAt(compared_row);
assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
@ -101,6 +104,53 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
}
}
// A specialization of compareValuesWithOffset for floats.
template <typename ColumnType>
static int compareValuesWithOffsetFloat(const IColumn * _compared_column,
size_t compared_row, const IColumn * _reference_column,
size_t reference_row,
const Field & _offset,
bool offset_is_preceding)
{
// Casting the columns to the known type here makes it faster, probably
// because the getData call can be devirtualized.
const auto * compared_column = assert_cast<const ColumnType *>(
_compared_column);
const auto * reference_column = assert_cast<const ColumnType *>(
_reference_column);
const auto offset = _offset.get<typename ColumnType::ValueType>();
assert(offset >= 0);
const auto compared_value_data = compared_column->getDataAt(compared_row);
assert(compared_value_data.size == sizeof(typename ColumnType::ValueType));
auto compared_value = unalignedLoad<typename ColumnType::ValueType>(
compared_value_data.data);
const auto reference_value_data = reference_column->getDataAt(reference_row);
assert(reference_value_data.size == sizeof(typename ColumnType::ValueType));
auto reference_value = unalignedLoad<typename ColumnType::ValueType>(
reference_value_data.data);
// Floats overflow to Inf and the comparison will work normally, so we don't
// have to do anything.
if (offset_is_preceding)
{
reference_value -= offset;
}
else
{
reference_value += offset;
}
const auto result = compared_value < reference_value ? -1
: compared_value == reference_value ? 0 : 1;
// fmt::print(stderr, "compared {}, offset {}, reference {}, result {}\n",
// compared_value, offset, reference_value, result);
return result;
}
// Helper macros to dispatch on type of the ORDER BY column
#define APPLY_FOR_ONE_TYPE(FUNCTION, TYPE) \
else if (typeid_cast<const TYPE *>(column)) \
@ -114,14 +164,20 @@ if (false) /* NOLINT */ \
{ \
/* Do nothing, a starter condition. */ \
} \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt8>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt16>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt32>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<UInt64>) \
\
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int8>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int16>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int32>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int64>) \
APPLY_FOR_ONE_TYPE(FUNCTION, ColumnVector<Int128>) \
\
APPLY_FOR_ONE_TYPE(FUNCTION##Float, ColumnVector<Float32>) \
APPLY_FOR_ONE_TYPE(FUNCTION##Float, ColumnVector<Float64>) \
\
else \
{ \
throw Exception(ErrorCodes::NOT_IMPLEMENTED, \
@ -193,9 +249,28 @@ WindowTransform::WindowTransform(const Block & input_header_,
== WindowFrame::BoundaryType::Offset))
{
assert(order_by_indices.size() == 1);
const IColumn * column = input_header.getByPosition(
order_by_indices[0]).column.get();
const auto & entry = input_header.getByPosition(order_by_indices[0]);
const IColumn * column = entry.column.get();
APPLY_FOR_TYPES(compareValuesWithOffset)
// Check that the offset type matches the window type.
// Convert the offsets to the ORDER BY column type. We can't just check
// that it matches, because e.g. the int literals are always (U)Int64,
// but the column might be Int8 and so on.
if (window_description.frame.begin_type
== WindowFrame::BoundaryType::Offset)
{
window_description.frame.begin_offset = convertFieldToTypeOrThrow(
window_description.frame.begin_offset,
*entry.type);
}
if (window_description.frame.end_type
== WindowFrame::BoundaryType::Offset)
{
window_description.frame.end_offset = convertFieldToTypeOrThrow(
window_description.frame.end_offset,
*entry.type);
}
}
}
@ -391,7 +466,7 @@ void WindowTransform::advanceFrameStartRowsOffset()
{
// Just recalculate it each time by walking blocks.
const auto [moved_row, offset_left] = moveRowNumber(current_row,
window_description.frame.begin_offset
window_description.frame.begin_offset.get<UInt64>()
* (window_description.frame.begin_preceding ? -1 : 1));
frame_start = moved_row;
@ -638,7 +713,7 @@ void WindowTransform::advanceFrameEndRowsOffset()
// Walk the specified offset from the current row. The "+1" is needed
// because the frame_end is a past-the-end pointer.
const auto [moved_row, offset_left] = moveRowNumber(current_row,
window_description.frame.end_offset
window_description.frame.end_offset.get<UInt64>()
* (window_description.frame.end_preceding ? -1 : 1)
+ 1);
@ -852,14 +927,14 @@ void WindowTransform::writeOutCurrentRow()
for (size_t wi = 0; wi < workspaces.size(); ++wi)
{
auto & ws = workspaces[wi];
IColumn * result_column = block.output_columns[wi].get();
if (ws.window_function_impl)
{
ws.window_function_impl->windowInsertResultInto(*result_column, this);
ws.window_function_impl->windowInsertResultInto(this, wi);
}
else
{
IColumn * result_column = block.output_columns[wi].get();
const auto * a = ws.aggregate_function.get();
auto * buf = ws.aggregate_function_state.data();
// FIXME does it also allocate the result on the arena?
@ -1280,8 +1355,11 @@ struct WindowFunctionRank final : public WindowFunction
DataTypePtr getReturnType() const override
{ return std::make_shared<DataTypeUInt64>(); }
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) override
{
IColumn & to = *transform->blockAt(transform->current_row)
.output_columns[function_index];
assert_cast<ColumnUInt64 &>(to).getData().push_back(
transform->peer_group_start_row_number);
}
@ -1297,8 +1375,11 @@ struct WindowFunctionDenseRank final : public WindowFunction
DataTypePtr getReturnType() const override
{ return std::make_shared<DataTypeUInt64>(); }
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) override
{
IColumn & to = *transform->blockAt(transform->current_row)
.output_columns[function_index];
assert_cast<ColumnUInt64 &>(to).getData().push_back(
transform->peer_group_number);
}
@ -1314,13 +1395,123 @@ struct WindowFunctionRowNumber final : public WindowFunction
DataTypePtr getReturnType() const override
{ return std::make_shared<DataTypeUInt64>(); }
void windowInsertResultInto(IColumn & to, const WindowTransform * transform) override
void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) override
{
IColumn & to = *transform->blockAt(transform->current_row)
.output_columns[function_index];
assert_cast<ColumnUInt64 &>(to).getData().push_back(
transform->current_row_number);
}
};
// ClickHouse-specific variant of lag/lead that respects the window frame.
template <bool is_lead>
struct WindowFunctionLagLeadInFrame final : public WindowFunction
{
WindowFunctionLagLeadInFrame(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_)
{
if (!parameters.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} cannot be parameterized", name_);
}
if (argument_types.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} takes at least one argument", name_);
}
if (argument_types.size() == 1)
{
return;
}
if (!isInt64FieldType(argument_types[1]->getDefault().getType()))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Offset must be an integer, '{}' given",
argument_types[1]->getName());
}
if (argument_types.size() == 2)
{
return;
}
if (!getLeastSupertype({argument_types[0], argument_types[2]}))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The default value type '{}' is not convertible to the argument type '{}'",
argument_types[2]->getName(),
argument_types[0]->getName());
}
if (argument_types.size() > 3)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function '{}' accepts at most 3 arguments, {} given",
name, argument_types.size());
}
}
DataTypePtr getReturnType() const override
{ return argument_types[0]; }
void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) override
{
const auto & current_block = transform->blockAt(transform->current_row);
IColumn & to = *current_block.output_columns[function_index];
const auto & workspace = transform->workspaces[function_index];
int offset = 1;
if (argument_types.size() > 1)
{
offset = (*current_block.input_columns[
workspace.argument_column_indices[1]])[
transform->current_row.row].get<Int64>();
if (offset < 0)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The offset for function {} must be nonnegative, {} given",
getName(), offset);
}
}
const auto [target_row, offset_left] = transform->moveRowNumber(
transform->current_row, offset * (is_lead ? 1 : -1));
if (offset_left != 0
|| target_row < transform->frame_start
|| transform->frame_end <= target_row)
{
// Offset is outside the frame.
if (argument_types.size() > 2)
{
// Column with default values is specified.
to.insertFrom(*current_block.input_columns[
workspace.argument_column_indices[2]],
transform->current_row.row);
}
else
{
to.insertDefault();
}
}
else
{
// Offset is inside the frame.
to.insertFrom(*transform->blockAt(target_row).input_columns[
workspace.argument_column_indices[0]],
target_row.row);
}
}
};
void registerWindowFunctions(AggregateFunctionFactory & factory)
{
// Why didn't I implement lag/lead yet? Because they are a mess. I imagine
@ -1332,9 +1523,10 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
// the whole partition like Postgres does, because using a linear amount
// of additional memory is not an option when we have a lot of data. We must
// be able to process at least the lag/lead in streaming fashion.
// Our best bet is probably rewriting, say `lag(value, offset)` to
// `any(value) over (rows between offset preceding and offset preceding)`,
// at the query planning stage.
// A partial solution for constant offsets is rewriting, say `lag(value, offset)
// to `any(value) over (rows between offset preceding and offset preceding)`.
// We also implement non-standard functions `lag/leadInFrame`, that are
// analogous to `lag/lead`, but respect the frame.
// Functions like cume_dist() do require materializing the entire
// partition, but it's probably also simpler to implement them by rewriting
// to a (rows between unbounded preceding and unbounded following) frame,
@ -1360,6 +1552,20 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
return std::make_shared<WindowFunctionRowNumber>(name, argument_types,
parameters);
});
factory.registerFunction("lagInFrame", [](const std::string & name,
const DataTypes & argument_types, const Array & parameters)
{
return std::make_shared<WindowFunctionLagLeadInFrame<false>>(
name, argument_types, parameters);
});
factory.registerFunction("leadInFrame", [](const std::string & name,
const DataTypes & argument_types, const Array & parameters)
{
return std::make_shared<WindowFunctionLagLeadInFrame<true>>(
name, argument_types, parameters);
});
}
}

View File

@ -110,7 +110,9 @@ public:
Status prepare() override;
void work() override;
private:
/*
* Implementation details.
*/
void advancePartitionEnd();
bool arePeers(const RowNumber & x, const RowNumber & y) const;
@ -321,10 +323,7 @@ public:
int (* compare_values_with_offset) (
const IColumn * compared_column, size_t compared_row,
const IColumn * reference_column, size_t reference_row,
// We can make it a Field later if we need the Decimals. Now we only
// have ints and datetime, and the underlying Field type for them is
// uint64_t anyway.
uint64_t offset,
const Field & offset,
bool offset_is_preceding);
};

View File

@ -8,10 +8,10 @@
#include <Core/Protocol.h>
#include <Core/QueryProcessingStage.h>
#include <IO/Progress.h>
#include <IO/TimeoutSetter.h>
#include <DataStreams/BlockIO.h>
#include <Interpreters/InternalTextLogsQueue.h>
#include <Interpreters/Context.h>
#include <Client/TimeoutSetter.h>
#include "IServer.h"

View File

@ -26,7 +26,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl
HDFSBuilderWrapper builder;
HDFSFSPtr fs;
explicit ReadBufferFromHDFSImpl(const std::string & hdfs_name_,
ReadBufferFromHDFSImpl(const std::string & hdfs_name_,
const Poco::Util::AbstractConfiguration & config_)
: hdfs_uri(hdfs_name_),
builder(createHDFSBuilder(hdfs_uri, config_))

View File

@ -333,40 +333,49 @@ IMergeTreeDataPart::State IMergeTreeDataPart::getState() const
}
DayNum IMergeTreeDataPart::getMinDate() const
std::pair<DayNum, DayNum> IMergeTreeDataPart::getMinMaxDate() const
{
if (storage.minmax_idx_date_column_pos != -1 && minmax_idx.initialized)
return DayNum(minmax_idx.hyperrectangle[storage.minmax_idx_date_column_pos].left.get<UInt64>());
{
const auto & hyperrectangle = minmax_idx.hyperrectangle[storage.minmax_idx_date_column_pos];
return {DayNum(hyperrectangle.left.get<UInt64>()), DayNum(hyperrectangle.right.get<UInt64>())};
}
else
return DayNum();
return {};
}
DayNum IMergeTreeDataPart::getMaxDate() const
{
if (storage.minmax_idx_date_column_pos != -1 && minmax_idx.initialized)
return DayNum(minmax_idx.hyperrectangle[storage.minmax_idx_date_column_pos].right.get<UInt64>());
else
return DayNum();
}
time_t IMergeTreeDataPart::getMinTime() const
std::pair<time_t, time_t> IMergeTreeDataPart::getMinMaxTime() const
{
if (storage.minmax_idx_time_column_pos != -1 && minmax_idx.initialized)
return minmax_idx.hyperrectangle[storage.minmax_idx_time_column_pos].left.get<UInt64>();
{
const auto & hyperrectangle = minmax_idx.hyperrectangle[storage.minmax_idx_time_column_pos];
/// The case of DateTime
if (hyperrectangle.left.getType() == Field::Types::UInt64)
{
assert(hyperrectangle.right.getType() == Field::Types::UInt64);
return {hyperrectangle.left.get<UInt64>(), hyperrectangle.right.get<UInt64>()};
}
/// The case of DateTime64
else if (hyperrectangle.left.getType() == Field::Types::Decimal64)
{
assert(hyperrectangle.right.getType() == Field::Types::Decimal64);
auto left = hyperrectangle.left.get<DecimalField<Decimal64>>();
auto right = hyperrectangle.right.get<DecimalField<Decimal64>>();
assert(left.getScale() == right.getScale());
return { left.getValue() / left.getScaleMultiplier(), right.getValue() / right.getScaleMultiplier() };
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Part minmax index by time is neither DateTime or DateTime64");
}
else
return 0;
return {};
}
time_t IMergeTreeDataPart::getMaxTime() const
{
if (storage.minmax_idx_time_column_pos != -1 && minmax_idx.initialized)
return minmax_idx.hyperrectangle[storage.minmax_idx_time_column_pos].right.get<UInt64>();
else
return 0;
}
void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns)
{
columns = new_columns;
@ -1013,7 +1022,7 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_
}
volume->getDisk()->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr)));
volume->getDisk()->moveFile(from, to);
volume->getDisk()->moveDirectory(from, to);
relative_path = new_relative_path;
SyncGuardPtr sync_guard;
@ -1065,7 +1074,7 @@ void IMergeTreeDataPart::remove(bool keep_s3) const
try
{
volume->getDisk()->moveFile(from, to);
volume->getDisk()->moveDirectory(from, to);
}
catch (const Poco::FileNotFoundException &)
{

View File

@ -155,13 +155,11 @@ public:
bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }
/// If the partition key includes date column (a common case), these functions will return min and max values for this column.
DayNum getMinDate() const;
DayNum getMaxDate() const;
/// If the partition key includes date column (a common case), this function will return min and max values for that column.
std::pair<DayNum, DayNum> getMinMaxDate() const;
/// otherwise, if the partition key includes dateTime column (also a common case), these functions will return min and max values for this column.
time_t getMinTime() const;
time_t getMaxTime() const;
/// otherwise, if the partition key includes dateTime column (also a common case), this function will return min and max values for that column.
std::pair<time_t, time_t> getMinMaxTime() const;
bool isEmpty() const { return rows_count == 0; }

View File

@ -469,15 +469,19 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
DataTypes minmax_idx_columns_types = getMinMaxColumnsTypes(new_partition_key);
/// Try to find the date column in columns used by the partition key (a common case).
bool encountered_date_column = false;
/// If there are no - DateTime or DateTime64 would also suffice.
bool has_date_column = false;
bool has_datetime_column = false;
for (size_t i = 0; i < minmax_idx_columns_types.size(); ++i)
{
if (typeid_cast<const DataTypeDate *>(minmax_idx_columns_types[i].get()))
if (isDate(minmax_idx_columns_types[i]))
{
if (!encountered_date_column)
if (!has_date_column)
{
minmax_idx_date_column_pos = i;
encountered_date_column = true;
has_date_column = true;
}
else
{
@ -486,16 +490,18 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa
}
}
}
if (!encountered_date_column)
if (!has_date_column)
{
for (size_t i = 0; i < minmax_idx_columns_types.size(); ++i)
{
if (typeid_cast<const DataTypeDateTime *>(minmax_idx_columns_types[i].get()))
if (isDateTime(minmax_idx_columns_types[i])
|| isDateTime64(minmax_idx_columns_types[i])
)
{
if (!encountered_date_column)
if (!has_datetime_column)
{
minmax_idx_time_column_pos = i;
encountered_date_column = true;
has_datetime_column = true;
}
else
{

View File

@ -37,6 +37,8 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
: table_columns{ext::map<std::unordered_set>(
metadata_snapshot->getColumns().getAllPhysical(), [](const NameAndTypePair & col) { return col.name; })}
, queried_columns{queried_columns_}
, sorting_key_names{NameSet(
metadata_snapshot->getSortingKey().column_names.begin(), metadata_snapshot->getSortingKey().column_names.end())}
, block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)}
, log{log_}
, column_sizes{std::move(column_sizes_)}
@ -114,12 +116,12 @@ static bool isConditionGood(const ASTPtr & condition)
}
void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node) const
void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node, bool is_final) const
{
if (const auto * func_and = node->as<ASTFunction>(); func_and && func_and->name == "and")
{
for (const auto & elem : func_and->arguments->children)
analyzeImpl(res, elem);
analyzeImpl(res, elem, is_final);
}
else
{
@ -133,7 +135,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node)
cond.viable =
/// Condition depend on some column. Constant expressions are not moved.
!cond.identifiers.empty()
&& !cannotBeMoved(node)
&& !cannotBeMoved(node, is_final)
/// Do not take into consideration the conditions consisting only of the first primary key column
&& !hasPrimaryKeyAtoms(node)
/// Only table columns are considered. Not array joined columns. NOTE We're assuming that aliases was expanded.
@ -149,10 +151,10 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node)
}
/// Transform conjunctions chain in WHERE expression to Conditions list.
MergeTreeWhereOptimizer::Conditions MergeTreeWhereOptimizer::analyze(const ASTPtr & expression) const
MergeTreeWhereOptimizer::Conditions MergeTreeWhereOptimizer::analyze(const ASTPtr & expression, bool is_final) const
{
Conditions res;
analyzeImpl(res, expression);
analyzeImpl(res, expression, is_final);
return res;
}
@ -183,7 +185,7 @@ void MergeTreeWhereOptimizer::optimize(ASTSelectQuery & select) const
if (!select.where() || select.prewhere())
return;
Conditions where_conditions = analyze(select.where());
Conditions where_conditions = analyze(select.where(), select.final());
Conditions prewhere_conditions;
UInt64 total_size_of_moved_conditions = 0;
@ -300,6 +302,12 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const ASTPtr & ast) const
}
bool MergeTreeWhereOptimizer::isSortingKey(const String & column_name) const
{
return sorting_key_names.count(column_name);
}
bool MergeTreeWhereOptimizer::isConstant(const ASTPtr & expr) const
{
const auto column_name = expr->getColumnName();
@ -319,7 +327,7 @@ bool MergeTreeWhereOptimizer::isSubsetOfTableColumns(const NameSet & identifiers
}
bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const
bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr, bool is_final) const
{
if (const auto * function_ptr = ptr->as<ASTFunction>())
{
@ -336,12 +344,13 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const
{
/// disallow moving result of ARRAY JOIN to PREWHERE
if (array_joined_names.count(*opt_name) ||
array_joined_names.count(Nested::extractTableName(*opt_name)))
array_joined_names.count(Nested::extractTableName(*opt_name)) ||
(is_final && !isSortingKey(*opt_name)))
return true;
}
for (const auto & child : ptr->children)
if (cannotBeMoved(child))
if (cannotBeMoved(child, is_final))
return true;
return false;

View File

@ -67,10 +67,10 @@ private:
using Conditions = std::list<Condition>;
void analyzeImpl(Conditions & res, const ASTPtr & node) const;
void analyzeImpl(Conditions & res, const ASTPtr & node, bool is_final) const;
/// Transform conjunctions chain in WHERE expression to Conditions list.
Conditions analyze(const ASTPtr & expression) const;
Conditions analyze(const ASTPtr & expression, bool is_final) const;
/// Transform Conditions list to WHERE or PREWHERE expression.
static ASTPtr reconstruct(const Conditions & conditions);
@ -85,6 +85,8 @@ private:
bool isPrimaryKeyAtom(const ASTPtr & ast) const;
bool isSortingKey(const String & column_name) const;
bool isConstant(const ASTPtr & expr) const;
bool isSubsetOfTableColumns(const NameSet & identifiers) const;
@ -95,7 +97,7 @@ private:
*
* Also, disallow moving expressions with GLOBAL [NOT] IN.
*/
bool cannotBeMoved(const ASTPtr & ptr) const;
bool cannotBeMoved(const ASTPtr & ptr, bool is_final) const;
void determineArrayJoinedNames(ASTSelectQuery & select);
@ -104,6 +106,7 @@ private:
String first_primary_key_column;
const StringSet table_columns;
const Names queried_columns;
const NameSet sorting_key_names;
const Block block_with_constants;
Poco::Logger * log;
std::unordered_map<std::string, UInt64> column_sizes;

View File

@ -179,6 +179,7 @@ StoragePtr StorageFactory::get(
.attach = query.attach,
.has_force_restore_data_flag = has_force_restore_data_flag
};
assert(&arguments.context == &arguments.context.getGlobalContext());
auto res = storages.at(name).creator_fn(arguments);
if (!empty_engine_args.empty())

View File

@ -1389,7 +1389,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const
DataPartsLock lock(mutex);
for (MutableDataPartPtr & part : dst_parts)
dest_table_storage->renameTempPartAndReplace(part, &increment, &transaction, lock);
dest_table_storage->renameTempPartAndReplace(part, &dest_table_storage->increment, &transaction, lock);
removePartsFromWorkingSet(src_parts, true, lock);
transaction.commit(&lock);

View File

@ -33,7 +33,7 @@ namespace ErrorCodes
IStorageURLBase::IStorageURLBase(
const Poco::URI & uri_,
const Context & context_,
const Context & /*context_*/,
const StorageID & table_id_,
const String & format_name_,
const std::optional<FormatSettings> & format_settings_,
@ -42,13 +42,10 @@ IStorageURLBase::IStorageURLBase(
const String & compression_method_)
: IStorage(table_id_)
, uri(uri_)
, context_global(context_)
, compression_method(compression_method_)
, format_name(format_name_)
, format_settings(format_settings_)
{
context_global.getRemoteHostFilter().checkURL(uri);
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
storage_metadata.setConstraints(constraints_);
@ -237,14 +234,28 @@ Pipe IStorageURLBase::read(
chooseCompressionMethod(request_uri.getPath(), compression_method)));
}
BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context)
{
return std::make_shared<StorageURLBlockOutputStream>(uri, format_name,
format_settings, metadata_snapshot->getSampleBlock(), context_global,
ConnectionTimeouts::getHTTPTimeouts(context_global),
format_settings, metadata_snapshot->getSampleBlock(), context,
ConnectionTimeouts::getHTTPTimeouts(context),
chooseCompressionMethod(uri.toString(), compression_method));
}
StorageURL::StorageURL(const Poco::URI & uri_,
const StorageID & table_id_,
const String & format_name_,
const std::optional<FormatSettings> & format_settings_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
Context & context_,
const String & compression_method_)
: IStorageURLBase(uri_, context_, table_id_, format_name_,
format_settings_, columns_, constraints_, compression_method_)
{
context_.getRemoteHostFilter().checkURL(uri);
}
void registerStorageURL(StorageFactory & factory)
{
factory.registerStorage("URL", [](const StorageFactory::Arguments & args)

View File

@ -45,7 +45,6 @@ protected:
const String & compression_method_);
Poco::URI uri;
const Context & context_global;
String compression_method;
String format_name;
// For URL engine, we use format settings from server context + `SETTINGS`
@ -114,11 +113,7 @@ public:
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
Context & context_,
const String & compression_method_)
: IStorageURLBase(uri_, context_, table_id_, format_name_,
format_settings_, columns_, constraints_, compression_method_)
{
}
const String & compression_method_);
String getName() const override
{

View File

@ -1,6 +1,7 @@
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeArray.h>
#include <Storages/System/StorageSystemErrors.h>
#include <Common/ErrorCodes.h>
#include <Interpreters/Context.h>
@ -16,7 +17,7 @@ NamesAndTypesList StorageSystemErrors::getNamesAndTypes()
{ "value", std::make_shared<DataTypeUInt64>() },
{ "last_error_time", std::make_shared<DataTypeDateTime>() },
{ "last_error_message", std::make_shared<DataTypeString>() },
{ "last_error_stacktrace", std::make_shared<DataTypeString>() },
{ "last_error_trace", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()) },
{ "remote", std::make_shared<DataTypeUInt8>() },
};
}
@ -34,7 +35,14 @@ void StorageSystemErrors::fillData(MutableColumns & res_columns, const Context &
res_columns[col_num++]->insert(error.count);
res_columns[col_num++]->insert(error.error_time_ms / 1000);
res_columns[col_num++]->insert(error.message);
res_columns[col_num++]->insert(error.stacktrace);
{
Array trace_array;
trace_array.reserve(error.trace.size());
for (size_t i = 0; i < error.trace.size(); ++i)
trace_array.emplace_back(reinterpret_cast<intptr_t>(error.trace[i]));
res_columns[col_num++]->insert(trace_array);
}
res_columns[col_num++]->insert(remote);
}
};

View File

@ -137,14 +137,17 @@ void StorageSystemParts::processNextStorage(
if (columns_mask[src_index++])
columns[res_index++]->insert(static_cast<UInt64>(part.use_count() - 1));
auto min_max_date = part->getMinMaxDate();
auto min_max_time = part->getMinMaxTime();
if (columns_mask[src_index++])
columns[res_index++]->insert(part->getMinDate());
columns[res_index++]->insert(min_max_date.first);
if (columns_mask[src_index++])
columns[res_index++]->insert(part->getMaxDate());
columns[res_index++]->insert(min_max_date.second);
if (columns_mask[src_index++])
columns[res_index++]->insert(static_cast<UInt32>(part->getMinTime()));
columns[res_index++]->insert(static_cast<UInt32>(min_max_time.first));
if (columns_mask[src_index++])
columns[res_index++]->insert(static_cast<UInt32>(part->getMaxTime()));
columns[res_index++]->insert(static_cast<UInt32>(min_max_time.second));
if (columns_mask[src_index++])
columns[res_index++]->insert(part->info.partition_id);
if (columns_mask[src_index++])

View File

@ -32,6 +32,8 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_
{"refcount", std::make_shared<DataTypeUInt32>()},
{"min_date", std::make_shared<DataTypeDate>()},
{"max_date", std::make_shared<DataTypeDate>()},
{"min_time", std::make_shared<DataTypeDateTime>()},
{"max_time", std::make_shared<DataTypeDateTime>()},
{"partition_id", std::make_shared<DataTypeString>()},
{"min_block_number", std::make_shared<DataTypeInt64>()},
{"max_block_number", std::make_shared<DataTypeInt64>()},
@ -95,8 +97,10 @@ void StorageSystemPartsColumns::processNextStorage(
/// For convenience, in returned refcount, don't add references that was due to local variables in this method: all_parts, active_parts.
auto use_count = part.use_count() - 1;
auto min_date = part->getMinDate();
auto max_date = part->getMaxDate();
auto min_max_date = part->getMinMaxDate();
auto min_max_time = part->getMinMaxTime();
auto index_size_in_bytes = part->getIndexSizeInBytes();
auto index_size_in_allocated_bytes = part->getIndexSizeInAllocatedBytes();
@ -141,9 +145,14 @@ void StorageSystemPartsColumns::processNextStorage(
columns[res_index++]->insert(UInt64(use_count));
if (columns_mask[src_index++])
columns[res_index++]->insert(min_date);
columns[res_index++]->insert(min_max_date.first);
if (columns_mask[src_index++])
columns[res_index++]->insert(max_date);
columns[res_index++]->insert(min_max_date.second);
if (columns_mask[src_index++])
columns[res_index++]->insert(static_cast<UInt32>(min_max_time.first));
if (columns_mask[src_index++])
columns[res_index++]->insert(static_cast<UInt32>(min_max_time.second));
if (columns_mask[src_index++])
columns[res_index++]->insert(part->info.partition_id);
if (columns_mask[src_index++])

View File

@ -20,12 +20,20 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, const Context &
ProfileEvents::increment(ProfileEvents::TableFunctionExecute);
context.checkAccess(AccessType::CREATE_TEMPORARY_TABLE | StorageFactory::instance().getSourceAccessType(getStorageTypeName()));
if (cached_columns.empty() || (hasStaticStructure() && cached_columns == getActualTableStructure(context)))
if (cached_columns.empty())
return executeImpl(ast_function, context, table_name, std::move(cached_columns));
auto get_storage = [=, tf = shared_from_this()]() -> StoragePtr
/// We have table structure, so it's CREATE AS table_function().
/// We should use global context here because there will be no query context on server startup
/// and because storage lifetime is bigger than query context lifetime.
const Context & global_context = context.getGlobalContext();
if (hasStaticStructure() && cached_columns == getActualTableStructure(context))
return executeImpl(ast_function, global_context, table_name, std::move(cached_columns));
auto this_table_function = shared_from_this();
auto get_storage = [=, &global_context]() -> StoragePtr
{
return tf->executeImpl(ast_function, context, table_name, cached_columns);
return this_table_function->executeImpl(ast_function, global_context, table_name, cached_columns);
};
/// It will request actual table structure and create underlying storage lazily

View File

@ -55,15 +55,21 @@ void ITableFunctionXDBC::parseArguments(const ASTPtr & ast_function, const Conte
connection_string = args[0]->as<ASTLiteral &>().value.safeGet<String>();
remote_table_name = args[1]->as<ASTLiteral &>().value.safeGet<String>();
}
}
/// Have to const_cast, because bridges store their commands inside context
helper = createBridgeHelper(const_cast<Context &>(context), context.getSettingsRef().http_receive_timeout.value, connection_string);
helper->startBridgeSync();
void ITableFunctionXDBC::startBridgeIfNot(const Context & context) const
{
if (!helper)
{
/// Have to const_cast, because bridges store their commands inside context
helper = createBridgeHelper(const_cast<Context &>(context), context.getSettingsRef().http_receive_timeout.value, connection_string);
helper->startBridgeSync();
}
}
ColumnsDescription ITableFunctionXDBC::getActualTableStructure(const Context & context) const
{
assert(helper);
startBridgeIfNot(context);
/* Infer external table structure */
Poco::URI columns_info_uri = helper->getColumnsInfoURI();
@ -87,7 +93,7 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(const Context & c
StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, const Context & context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
{
assert(helper);
startBridgeIfNot(context);
auto columns = getActualTableStructure(context);
auto result = std::make_shared<StorageXDBC>(StorageID(getDatabaseName(), table_name), schema_name, remote_table_name, columns, context, helper);
result->startup();

View File

@ -29,10 +29,12 @@ private:
void parseArguments(const ASTPtr & ast_function, const Context & context) override;
void startBridgeIfNot(const Context & context) const;
String connection_string;
String schema_name;
String remote_table_name;
BridgeHelperPtr helper;
mutable BridgeHelperPtr helper;
};
class TableFunctionJDBC : public ITableFunctionXDBC

View File

@ -7,12 +7,12 @@ class Layout(object):
'flat': '<flat/>',
'hashed': '<hashed/>',
'cache': '<cache><size_in_cells>128</size_in_cells></cache>',
'ssd_cache': '<ssd_cache><path>/etc/clickhouse/dictionaries/all</path><max_stored_keys>128</max_stored_keys></ssd_cache>',
'ssd_cache': '<ssd_cache><path>/etc/clickhouse/dictionaries/all</path></ssd_cache>',
'complex_key_hashed': '<complex_key_hashed/>',
'complex_key_hashed_one_key': '<complex_key_hashed/>',
'complex_key_hashed_two_keys': '<complex_key_hashed/>',
'complex_key_cache': '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>',
'complex_key_ssd_cache': '<complex_key_ssd_cache><path>/etc/clickhouse/dictionaries/all</path><max_stored_keys>128</max_stored_keys></complex_key_ssd_cache>',
'complex_key_ssd_cache': '<complex_key_ssd_cache><path>/etc/clickhouse/dictionaries/all</path></complex_key_ssd_cache>',
'range_hashed': '<range_hashed/>',
'direct': '<direct/>',
'complex_key_direct': '<complex_key_direct/>'

View File

@ -42,7 +42,6 @@
<read_buffer_size>131072</read_buffer_size>
<write_buffer_size>1048576</write_buffer_size>
<path>/etc/clickhouse/dictionaries/radars</path>
<max_stored_keys>1048576</max_stored_keys>
</complex_key_ssd_cache>
</layout>
<lifetime>1</lifetime>

View File

@ -105,6 +105,9 @@ def started_cluster():
node1.exec_in_container(
["bash", "-c", "echo 'CREATE TABLE t4(X INTEGER PRIMARY KEY ASC, Y, Z);' | sqlite3 {}".format(sqlite_db)],
privileged=True, user='root')
node1.exec_in_container(
["bash", "-c", "echo 'CREATE TABLE tf1(x INTEGER PRIMARY KEY ASC, y, z);' | sqlite3 {}".format(sqlite_db)],
privileged=True, user='root')
print("sqlite tables created")
mysql_conn = get_mysql_conn()
print("mysql connection received")
@ -208,6 +211,21 @@ def test_sqlite_simple_select_function_works(started_cluster):
assert node1.query(
"select count(), sum(x) from odbc('DSN={}', '{}') group by x".format(sqlite_setup["DSN"], 't1')) == "1\t1\n"
def test_sqlite_table_function(started_cluster):
sqlite_setup = node1.odbc_drivers["SQLite3"]
sqlite_db = sqlite_setup["Database"]
node1.exec_in_container(["bash", "-c", "echo 'INSERT INTO tf1 values(1, 2, 3);' | sqlite3 {}".format(sqlite_db)],
privileged=True, user='root')
node1.query("create table odbc_tf as odbc('DSN={}', '{}')".format(sqlite_setup["DSN"], 'tf1'))
assert node1.query("select * from odbc_tf") == "1\t2\t3\n"
assert node1.query("select y from odbc_tf") == "2\n"
assert node1.query("select z from odbc_tf") == "3\n"
assert node1.query("select x from odbc_tf") == "1\n"
assert node1.query("select x, y from odbc_tf") == "1\t2\n"
assert node1.query("select z, x, y from odbc_tf") == "3\t1\t2\n"
assert node1.query("select count(), sum(x) from odbc_tf group by x") == "1\t1\n"
def test_sqlite_simple_select_storage_works(started_cluster):
sqlite_setup = node1.odbc_drivers["SQLite3"]

View File

@ -0,0 +1,14 @@
<yandex>
<tcp_port_secure>9440</tcp_port_secure>
<remote_servers>
<test_cluster>
<shard>
<replica>
<host>node2</host>
<port>9440</port>
<secure>1</secure>
</replica>
</shard>
</test_cluster>
</remote_servers>
</yandex>

View File

@ -0,0 +1,18 @@
<yandex>
<openSSL>
<server>
<certificateFile>/etc/clickhouse-server/config.d/server.crt</certificateFile>
<privateKeyFile>/etc/clickhouse-server/config.d/server.key</privateKeyFile>
<dhParamsFile>/etc/clickhouse-server/config.d/dhparam.pem</dhParamsFile>
<verificationMode>none</verificationMode>
<cacheSessions>true</cacheSessions>
</server>
<client>
<cacheSessions>true</cacheSessions>
<verificationMode>none</verificationMode>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
</yandex>

View File

@ -0,0 +1,8 @@
-----BEGIN DH PARAMETERS-----
MIIBCAKCAQEAua92DDli13gJ+//ZXyGaggjIuidqB0crXfhUlsrBk9BV1hH3i7fR
XGP9rUdk2ubnB3k2ejBStL5oBrkHm9SzUFSQHqfDjLZjKoUpOEmuDc4cHvX1XTR5
Pr1vf5cd0yEncJWG5W4zyUB8k++SUdL2qaeslSs+f491HBLDYn/h8zCgRbBvxhxb
9qeho1xcbnWeqkN6Kc9bgGozA16P9NLuuLttNnOblkH+lMBf42BSne/TWt3AlGZf
slKmmZcySUhF8aKfJnLKbkBCFqOtFRh8zBA9a7g+BT/lSANATCDPaAk1YVih2EKb
dpc3briTDbRsiqg2JKMI7+VdULY9bh3EawIBAg==
-----END DH PARAMETERS-----

View File

@ -0,0 +1,19 @@
-----BEGIN CERTIFICATE-----
MIIC/TCCAeWgAwIBAgIJANjx1QSR77HBMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV
BAMMCWxvY2FsaG9zdDAgFw0xODA3MzAxODE2MDhaGA8yMjkyMDUxNDE4MTYwOFow
FDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
CgKCAQEAs9uSo6lJG8o8pw0fbVGVu0tPOljSWcVSXH9uiJBwlZLQnhN4SFSFohfI
4K8U1tBDTnxPLUo/V1K9yzoLiRDGMkwVj6+4+hE2udS2ePTQv5oaMeJ9wrs+5c9T
4pOtlq3pLAdm04ZMB1nbrEysceVudHRkQbGHzHp6VG29Fw7Ga6YpqyHQihRmEkTU
7UCYNA+Vk7aDPdMS/khweyTpXYZimaK9f0ECU3/VOeG3fH6Sp2X6FN4tUj/aFXEj
sRmU5G2TlYiSIUMF2JPdhSihfk1hJVALrHPTU38SOL+GyyBRWdNcrIwVwbpvsvPg
pryMSNxnpr0AK0dFhjwnupIv5hJIOQIDAQABo1AwTjAdBgNVHQ4EFgQUjPLb3uYC
kcamyZHK4/EV8jAP0wQwHwYDVR0jBBgwFoAUjPLb3uYCkcamyZHK4/EV8jAP0wQw
DAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAM/ocuDvfPus/KpMVD51j
4IdlU8R0vmnYLQ+ygzOAo7+hUWP5j0yvq4ILWNmQX6HNvUggCgFv9bjwDFhb/5Vr
85ieWfTd9+LTjrOzTw4avdGwpX9G+6jJJSSq15tw5ElOIFb/qNA9O4dBiu8vn03C
L/zRSXrARhSqTW5w/tZkUcSTT+M5h28+Lgn9ysx4Ff5vi44LJ1NnrbJbEAIYsAAD
+UA+4MBFKx1r6hHINULev8+lCfkpwIaeS8RL+op4fr6kQPxnULw8wT8gkuc8I4+L
P9gg/xDHB44T3ADGZ5Ib6O0DJaNiToO6rnoaaxs0KkotbvDWvRoxEytSbXKoYjYp
0g==
-----END CERTIFICATE-----

View File

@ -0,0 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCz25KjqUkbyjyn
DR9tUZW7S086WNJZxVJcf26IkHCVktCeE3hIVIWiF8jgrxTW0ENOfE8tSj9XUr3L
OguJEMYyTBWPr7j6ETa51LZ49NC/mhox4n3Cuz7lz1Pik62WreksB2bThkwHWdus
TKxx5W50dGRBsYfMenpUbb0XDsZrpimrIdCKFGYSRNTtQJg0D5WTtoM90xL+SHB7
JOldhmKZor1/QQJTf9U54bd8fpKnZfoU3i1SP9oVcSOxGZTkbZOViJIhQwXYk92F
KKF+TWElUAusc9NTfxI4v4bLIFFZ01ysjBXBum+y8+CmvIxI3GemvQArR0WGPCe6
ki/mEkg5AgMBAAECggEATrbIBIxwDJOD2/BoUqWkDCY3dGevF8697vFuZKIiQ7PP
TX9j4vPq0DfsmDjHvAPFkTHiTQXzlroFik3LAp+uvhCCVzImmHq0IrwvZ9xtB43f
7Pkc5P6h1l3Ybo8HJ6zRIY3TuLtLxuPSuiOMTQSGRL0zq3SQ5DKuGwkz+kVjHXUN
MR2TECFwMHKQ5VLrC+7PMpsJYyOMlDAWhRfUalxC55xOXTpaN8TxNnwQ8K2ISVY5
212Jz/a4hn4LdwxSz3Tiu95PN072K87HLWx3EdT6vW4Ge5P/A3y+smIuNAlanMnu
plHBRtpATLiTxZt/n6npyrfQVbYjSH7KWhB8hBHtaQKBgQDh9Cq1c/KtqDtE0Ccr
/r9tZNTUwBE6VP+3OJeKdEdtsfuxjOCkS1oAjgBJiSDOiWPh1DdoDeVZjPKq6pIu
Mq12OE3Doa8znfCXGbkSzEKOb2unKZMJxzrz99kXt40W5DtrqKPNb24CNqTiY8Aa
CjtcX+3weat82VRXvph6U8ltMwKBgQDLxjiQQzNoY7qvg7CwJCjf9qq8jmLK766g
1FHXopqS+dTxDLM8eJSRrpmxGWJvNeNc1uPhsKsKgotqAMdBUQTf7rSTbt4MyoH5
bUcRLtr+0QTK9hDWMOOvleqNXha68vATkohWYfCueNsC60qD44o8RZAS6UNy3ENq
cM1cxqe84wKBgQDKkHutWnooJtajlTxY27O/nZKT/HA1bDgniMuKaz4R4Gr1PIez
on3YW3V0d0P7BP6PWRIm7bY79vkiMtLEKdiKUGWeyZdo3eHvhDb/3DCawtau8L2K
GZsHVp2//mS1Lfz7Qh8/L/NedqCQ+L4iWiPnZ3THjjwn3CoZ05ucpvrAMwKBgB54
nay039MUVq44Owub3KDg+dcIU62U+cAC/9oG7qZbxYPmKkc4oL7IJSNecGHA5SbU
2268RFdl/gLz6tfRjbEOuOHzCjFPdvAdbysanpTMHLNc6FefJ+zxtgk9sJh0C4Jh
vxFrw9nTKKzfEl12gQ1SOaEaUIO0fEBGbe8ZpauRAoGAMAlGV+2/K4ebvAJKOVTa
dKAzQ+TD2SJmeR1HZmKDYddNqwtZlzg3v4ZhCk4eaUmGeC1Bdh8MDuB3QQvXz4Dr
vOIP4UVaOr+uM+7TgAgVnP4/K6IeJGzUDhX93pmpWhODfdu/oojEKVcpCojmEmS1
KCBtmIrQLqzMpnBpLNuSY+Q=
-----END PRIVATE KEY-----

View File

@ -0,0 +1,6 @@
<yandex>
<profiles>
<default>
</default>
</profiles>
</yandex>

View File

@ -0,0 +1,84 @@
import os.path
import time
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
cluster = ClickHouseCluster(__file__)
NODES = {'node' + str(i): None for i in (1, 2)}
config = '''<yandex>
<profiles>
<default>
<sleep_in_send_data>{sleep_in_send_data}</sleep_in_send_data>
</default>
</profiles>
</yandex>'''
@pytest.fixture(scope="module")
def started_cluster():
cluster.__with_ssl_config = True
main_configs = [
"configs_secure/config.d/remote_servers.xml",
"configs_secure/server.crt",
"configs_secure/server.key",
"configs_secure/dhparam.pem",
"configs_secure/config.d/ssl_conf.xml",
]
NODES['node1'] = cluster.add_instance('node1', main_configs=main_configs)
NODES['node2'] = cluster.add_instance('node2', main_configs=main_configs, user_configs=["configs_secure/users.d/users.xml"])
try:
cluster.start()
NODES['node2'].query("CREATE TABLE base_table (x UInt64) ENGINE = MergeTree ORDER BY x;")
NODES['node2'].query("INSERT INTO base_table VALUES (5);")
NODES['node1'].query("CREATE TABLE distributed_table (x UInt64) ENGINE = Distributed(test_cluster, default, base_table);")
yield cluster
finally:
cluster.shutdown()
def test(started_cluster):
NODES['node2'].replace_config('/etc/clickhouse-server/users.d/users.xml', config.format(sleep_in_send_data=1000))
attempts = 0
while attempts < 1000:
setting = NODES['node2'].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data'")
if int(setting) == 1000:
break
time.sleep(0.1)
attempts += 1
assert attempts < 1000
start = time.time()
NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5, use_hedged_requests=0, async_socket_for_remote=0;')
end = time.time()
assert end - start < 10
start = time.time()
error = NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5, use_hedged_requests=0;')
end = time.time()
assert end - start < 10
# Check that exception about timeout wasn't thrown from DB::ReadBufferFromPocoSocket::nextImpl().
assert error.find('DB::ReadBufferFromPocoSocket::nextImpl()') == -1
start = time.time()
error = NODES['node1'].query_and_get_error('SELECT * FROM distributed_table settings receive_timeout=5;')
end = time.time()
assert end - start < 10
# Check that exception about timeout wasn't thrown from DB::ReadBufferFromPocoSocket::nextImpl().
assert error.find('DB::ReadBufferFromPocoSocket::nextImpl()') == -1

View File

@ -1,4 +1,4 @@
<test>
<test max_ignored_relative_change="0.2">
<settings>
<max_memory_usage>30000000000</max_memory_usage>
</settings>

View File

@ -1,4 +1,4 @@
<test>
<test max_ignored_relative_change="0.2">

View File

@ -1,4 +1,4 @@
<test>
<test max_ignored_relative_change="0.2">
<query>SELECT boundingRatio(number, number) FROM numbers(100000000)</query>
<query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(100000000)</query>
</test>

View File

@ -1,5 +1,5 @@
<!-- FIXME this instability is abysmal, investigate the unstable queries -->
<test>
<test max_ignored_relative_change="0.2">
<settings>
<allow_suspicious_codecs>1</allow_suspicious_codecs>
</settings>

View File

@ -1,4 +1,4 @@
<test>
<test max_ignored_relative_change="0.2">
<settings>
<allow_suspicious_codecs>1</allow_suspicious_codecs>
</settings>

View File

@ -1,4 +1,4 @@
<test>
<test max_ignored_relative_change="0.2">

Some files were not shown because too many files have changed in this diff Show More