Merge branch 'master' into fix-clang-tidy

This commit is contained in:
mergify[bot] 2021-08-23 10:54:36 +00:00 committed by GitHub
commit 95e9d517ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 897 additions and 109 deletions

10
base/common/unit.h Normal file
View File

@ -0,0 +1,10 @@
#pragma once
#include <cstddef>
constexpr size_t KiB = 1024;
constexpr size_t MiB = 1024 * KiB;
constexpr size_t GiB = 1024 * MiB;
constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; }
constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; }
constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; }

View File

@ -2,6 +2,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \

View File

@ -3,6 +3,8 @@ FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.10.1.*
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \
apt-transport-https \

View File

@ -3,6 +3,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install \
apt-transport-https \

View File

@ -3,6 +3,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \

View File

@ -5,6 +5,8 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& wget -nv -O /tmp/arrow-keyring.deb "https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-${CODENAME}.deb" \
&& dpkg -i /tmp/arrow-keyring.deb
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
# Libraries from OS are only needed to test the "unbundled" build (that is not used in production).
RUN apt-get update \
&& apt-get install \

View File

@ -26,6 +26,8 @@ ARG DEBIAN_FRONTEND=noninteractive
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \

View File

@ -3,6 +3,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \

View File

@ -2,6 +2,8 @@
# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser
FROM yandex/clickhouse-binary-builder
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev
# repo versions doesn't work correctly with C++17

View File

@ -3,6 +3,8 @@ FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \

View File

@ -5,6 +5,8 @@ ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
ca-certificates \

View File

@ -5,6 +5,8 @@ ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
bash \

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-sqlancer-test .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends
RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
RUN mkdir /sqlancer && \

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-style-test .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
shellcheck \
libxml2-utils \

View File

@ -1,6 +1,8 @@
# docker build -t yandex/clickhouse-testflows-runner .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
ca-certificates \

View File

@ -23,3 +23,5 @@ You can also use the following database engines:
- [PostgreSQL](../../engines/database-engines/postgresql.md)
- [Replicated](../../engines/database-engines/replicated.md)
- [SQLite](../../engines/database-engines/sqlite.md)

View File

@ -1,6 +1,6 @@
---
toc_priority: 29
toc_title: "[experimental] MaterializedMySQL"
toc_title: MaterializedMySQL
---
# [experimental] MaterializedMySQL {#materialized-mysql}

View File

@ -0,0 +1,80 @@
---
toc_priority: 32
toc_title: SQLite
---
# SQLite {#sqlite}
Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and SQLite.
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE sqlite_database
ENGINE = SQLite('db_path')
```
**Engine Parameters**
- `db_path` — Path to a file with SQLite database.
## Data Types Support {#data_types-support}
| SQLite | ClickHouse |
|---------------|---------------------------------------------------------|
| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
| REAL | [Float32](../../sql-reference/data-types/float.md) |
| TEXT | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
## Specifics and Recommendations {#specifics-and-recommendations}
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked.
SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself.
## Usage Example {#usage-example}
Database in ClickHouse, connected to the SQLite:
``` sql
CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
SHOW TABLES FROM sqlite_db;
```
``` text
┌──name───┐
│ table1 │
│ table2 │
└─────────┘
```
Shows the tables:
``` sql
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
Inserting data into SQLite table from ClickHouse table:
``` sql
CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
INSERT INTO clickhouse_table VALUES ('text',10);
INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
│ text │ 10 │
└───────┴──────┘
```

View File

@ -19,3 +19,4 @@ List of supported integrations:
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
- [SQLite](../../../engines/table-engines/integrations/sqlite.md)

View File

@ -0,0 +1,59 @@
---
toc_priority: 7
toc_title: SQLite
---
# SQLite {#sqlite}
The engine allows to import and export data to SQLite and supports queries to SQLite tables directly from ClickHouse.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2], ...
) ENGINE = SQLite('db_path', 'table')
```
**Engine Parameters**
- `db_path` — Path to SQLite file with a database.
- `table` — Name of a table in the SQLite database.
## Usage Example {#usage-example}
Shows a query creating the SQLite table:
```sql
SHOW CREATE TABLE sqlite_db.table2;
```
``` text
CREATE TABLE SQLite.table2
(
`col1` Nullable(Int32),
`col2` Nullable(String)
)
ENGINE = SQLite('sqlite.db','table2');
```
Returns the data from the table:
``` sql
SELECT * FROM sqlite_db.table2 ORDER BY col1;
```
```text
┌─col1─┬─col2──┐
│ 1 │ text1 │
│ 2 │ text2 │
│ 3 │ text3 │
└──────┴───────┘
```
**See Also**
- [SQLite](../../../engines/database-engines/sqlite.md) engine
- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function

View File

@ -6,7 +6,7 @@ toc_title: JOIN
Join produces a new table by combining columns from one or multiple tables by using values common to each. It is a common operation in databases with SQL support, which corresponds to [relational algebra](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators) join. The special case of one table join is often referred to as “self-join”.
Syntax:
**Syntax**
``` sql
SELECT <expr_list>
@ -38,7 +38,7 @@ Additional join types available in ClickHouse:
## Settings {#join-settings}
The default join type can be overriden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting.
The default join type can be overridden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting.
The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting.
@ -52,6 +52,61 @@ The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_
- [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge)
- [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys)
## ON Section Conditions {on-section-conditions}
An `ON` section can contain several conditions combined using the `AND` operator. Conditions specifying join keys must refer both left and right tables and must use the equality operator. Other conditions may use other logical operators but they must refer either the left or the right table of a query.
Rows are joined if the whole complex condition is met. If the conditions are not met, still rows may be included in the result depending on the `JOIN` type. Note that if the same conditions are placed in a `WHERE` section and they are not met, then rows are always filtered out from the result.
!!! note "Note"
The `OR` operator inside an `ON` section is not supported yet.
!!! note "Note"
If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far.
**Example**
Consider `table_1` and `table_2`:
```
┌─Id─┬─name─┐ ┌─Id─┬─text───────────┬─scores─┐
│ 1 │ A │ │ 1 │ Text A │ 10 │
│ 2 │ B │ │ 1 │ Another text A │ 12 │
│ 3 │ C │ │ 2 │ Text B │ 15 │
└────┴──────┘ └────┴────────────────┴────────┘
```
Query with one join key condition and an additional condition for `table_2`:
``` sql
SELECT name, text FROM table_1 LEFT OUTER JOIN table_2
ON table_1.Id = table_2.Id AND startsWith(table_2.text, 'Text');
```
Note that the result contains the row with the name `C` and the empty text column. It is included into the result because an `OUTER` type of a join is used.
```
┌─name─┬─text───┐
│ A │ Text A │
│ B │ Text B │
│ C │ │
└──────┴────────┘
```
Query with `INNER` type of a join and multiple conditions:
``` sql
SELECT name, text, scores FROM table_1 INNER JOIN table_2
ON table_1.Id = table_2.Id AND table_2.scores > 10 AND startsWith(table_2.text, 'Text');
```
Result:
```
┌─name─┬─text───┬─scores─┐
│ B │ Text B │ 15 │
└──────┴────────┴────────┘
```
## ASOF JOIN Usage {#asof-join-usage}
`ASOF JOIN` is useful when you need to join records that have no exact match.
@ -59,7 +114,7 @@ The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_
Algorithm requires the special column in tables. This column:
- Must contain an ordered sequence.
- Can be one of the following types: [Int*, UInt*](../../../sql-reference/data-types/int-uint.md), [Float\*](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal\*](../../../sql-reference/data-types/decimal.md).
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
- Cant be the only column in the `JOIN` clause.
Syntax `ASOF JOIN ... ON`:
@ -84,7 +139,7 @@ ASOF JOIN table_2
USING (equi_column1, ... equi_columnN, asof_column)
```
`ASOF JOIN` uses `equi_columnX` for joining on equality and `asof_column` for joining on the closest match with the `table_1.asof_column >= table_2.asof_column` condition. The `asof_column` column always the last one in the `USING` clause.
`ASOF JOIN` uses `equi_columnX` for joining on equality and `asof_column` for joining on the closest match with the `table_1.asof_column >= table_2.asof_column` condition. The `asof_column` column is always the last one in the `USING` clause.
For example, consider the following tables:

View File

@ -34,5 +34,6 @@ You can use table functions in:
| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. |
| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. |
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/) <!--hide-->

View File

@ -0,0 +1,45 @@
---
toc_priority: 55
toc_title: sqlite
---
## sqlite {#sqlite}
Allows to perform queries on a data stored in an [SQLite](../../engines/database-engines/sqlite.md) database.
**Syntax**
``` sql
sqlite('db_path', 'table_name')
```
**Arguments**
- `db_path` — Path to a file with an SQLite database. [String](../../sql-reference/data-types/string.md).
- `table_name` — Name of a table in the SQLite database. [String](../../sql-reference/data-types/string.md).
**Returned value**
- A table object with the same columns as in the original `SQLite` table.
**Example**
Query:
``` sql
SELECT * FROM sqlite('sqlite.db', 'table1') ORDER BY col2;
```
Result:
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
**See Also**
- [SQLite](../../engines/table-engines/integrations/sqlite.md) table engine

View File

@ -0,0 +1,79 @@
---
toc_priority: 32
toc_title: SQLite
---
# SQLite {#sqlite}
Движок баз данных позволяет подключаться к базе [SQLite](https://www.sqlite.org/index.html) и выполнять запросы `INSERT` и `SELECT` для обмена данными между ClickHouse и SQLite.
## Создание базы данных {#creating-a-database}
``` sql
CREATE DATABASE sqlite_database
ENGINE = SQLite('db_path')
```
**Параметры движка**
- `db_path` — путь к файлу с базой данных SQLite.
## Поддерживаемые типы данных {#data_types-support}
| SQLite | ClickHouse |
|---------------|---------------------------------------------------------|
| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
| REAL | [Float32](../../sql-reference/data-types/float.md) |
| TEXT | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
## Особенности и рекомендации {#specifics-and-recommendations}
SQLite хранит всю базу данных (определения, таблицы, индексы и сами данные) в виде единого кроссплатформенного файла на хост-машине. Во время записи SQLite блокирует весь файл базы данных, поэтому операции записи выполняются последовательно. Операции чтения могут быть многозадачными.
SQLite не требует управления службами (например, сценариями запуска) или контроля доступа на основе `GRANT` и паролей. Контроль доступа осуществляется с помощью разрешений файловой системы, предоставляемых самому файлу базы данных.
## Примеры использования {#usage-example}
Отобразим список таблиц базы данных в ClickHouse, подключенной к SQLite:
``` sql
CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
SHOW TABLES FROM sqlite_db;
```
``` text
┌──name───┐
│ table1 │
│ table2 │
└─────────┘
```
Отобразим содержимое таблицы:
``` sql
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
Вставим данные в таблицу SQLite из таблицы ClickHouse:
``` sql
CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
INSERT INTO clickhouse_table VALUES ('text',10);
INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
│ text │ 10 │
└───────┴──────┘
```

View File

@ -0,0 +1,59 @@
---
toc_priority: 7
toc_title: SQLite
---
# SQLite {#sqlite}
Движок позволяет импортировать и экспортировать данные из SQLite, а также поддерживает отправку запросов к таблицам SQLite напрямую из ClickHouse.
## Создание таблицы {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2], ...
) ENGINE = SQLite('db_path', 'table')
```
**Параметры движка**
- `db_path` — путь к файлу с базой данных SQLite.
- `table` — имя таблицы в базе данных SQLite.
## Примеры использования {#usage-example}
Отобразим запрос, с помощью которого была создана таблица SQLite:
```sql
SHOW CREATE TABLE sqlite_db.table2;
```
``` text
CREATE TABLE SQLite.table2
(
`col1` Nullable(Int32),
`col2` Nullable(String)
)
ENGINE = SQLite('sqlite.db','table2');
```
Получим данные из таблицы:
``` sql
SELECT * FROM sqlite_db.table2 ORDER BY col1;
```
```text
┌─col1─┬─col2──┐
│ 1 │ text1 │
│ 2 │ text2 │
│ 3 │ text3 │
└──────┴───────┘
```
**См. также**
- [SQLite](../../../engines/database-engines/sqlite.md) движок баз данных
- [sqlite](../../../sql-reference/table-functions/sqlite.md) табличная функция

View File

@ -6,7 +6,7 @@ toc_title: JOIN
`JOIN` создаёт новую таблицу путем объединения столбцов из одной или нескольких таблиц с использованием общих для каждой из них значений. Это обычная операция в базах данных с поддержкой SQL, которая соответствует join из [реляционной алгебры](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators). Частный случай соединения одной таблицы часто называют self-join.
Синтаксис:
**Синтаксис**
``` sql
SELECT <expr_list>
@ -19,7 +19,7 @@ FROM <left_table>
## Поддерживаемые типы соединения {#select-join-types}
Все типы из стандартого [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) поддерживаются:
Все типы из стандартного [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) поддерживаются:
- `INNER JOIN`, возвращаются только совпадающие строки.
- `LEFT OUTER JOIN`, не совпадающие строки из левой таблицы возвращаются в дополнение к совпадающим строкам.
@ -33,7 +33,7 @@ FROM <left_table>
- `LEFT SEMI JOIN` и `RIGHT SEMI JOIN`, белый список по ключам соединения, не производит декартово произведение.
- `LEFT ANTI JOIN` и `RIGHT ANTI JOIN`, черный список по ключам соединения, не производит декартово произведение.
- `LEFT ANY JOIN`, `RIGHT ANY JOIN` и `INNER ANY JOIN`, Частично (для противоположных сторон `LEFT` и `RIGHT`) или полностью (для `INNER` и `FULL`) отключает декартово произведение для стандартых видов `JOIN`.
- `LEFT ANY JOIN`, `RIGHT ANY JOIN` и `INNER ANY JOIN`, Частично (для противоположных сторон `LEFT` и `RIGHT`) или полностью (для `INNER` и `FULL`) отключает декартово произведение для стандартных видов `JOIN`.
- `ASOF JOIN` и `LEFT ASOF JOIN`, Для соединения последовательностей по нечеткому совпадению. Использование `ASOF JOIN` описано ниже.
## Настройки {#join-settings}
@ -52,6 +52,61 @@ FROM <left_table>
- [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge)
- [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys)
## Условия в секции ON {on-section-conditions}
Секция `ON` может содержать несколько условий, связанных оператором `AND`. Условия, задающие ключи соединения, должны содержать столбцы левой и правой таблицы и должны использовать оператор равенства. Прочие условия могут использовать другие логические операторы, но в отдельном условии могут использоваться столбцы либо только левой, либо только правой таблицы.
Строки объединяются только тогда, когда всё составное условие выполнено. Если оно не выполнено, то строки могут попасть в результат в зависимости от типа `JOIN`. Обратите внимание, что если то же самое условие поместить в секцию `WHERE`, то строки, для которых оно не выполняется, никогда не попаду в результат.
!!! note "Примечание"
Оператор `OR` внутри секции `ON` пока не поддерживается.
!!! note "Примечание"
Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`).
**Пример**
Рассмотрим `table_1` и `table_2`:
```
┌─Id─┬─name─┐ ┌─Id─┬─text───────────┬─scores─┐
│ 1 │ A │ │ 1 │ Text A │ 10 │
│ 2 │ B │ │ 1 │ Another text A │ 12 │
│ 3 │ C │ │ 2 │ Text B │ 15 │
└────┴──────┘ └────┴────────────────┴────────┘
```
Запрос с одним условием, задающим ключ соединения, и дополнительным условием для `table_2`:
``` sql
SELECT name, text FROM table_1 LEFT OUTER JOIN table_2
ON table_1.Id = table_2.Id AND startsWith(table_2.text, 'Text');
```
Обратите внимание, что результат содержит строку с именем `C` и пустым текстом. Строка включена в результат, потому что использован тип соединения `OUTER`.
```
┌─name─┬─text───┐
│ A │ Text A │
│ B │ Text B │
│ C │ │
└──────┴────────┘
```
Запрос с типом соединения `INNER` и несколькими условиями:
``` sql
SELECT name, text, scores FROM table_1 INNER JOIN table_2
ON table_1.Id = table_2.Id AND table_2.scores > 10 AND startsWith(table_2.text, 'Text');
```
Результат:
```
┌─name─┬─text───┬─scores─┐
│ B │ Text B │ 15 │
└──────┴────────┴────────┘
```
## Использование ASOF JOIN {#asof-join-usage}
`ASOF JOIN` применим в том случае, когда необходимо объединять записи, которые не имеют точного совпадения.
@ -59,7 +114,7 @@ FROM <left_table>
Для работы алгоритма необходим специальный столбец в таблицах. Этот столбец:
- Должен содержать упорядоченную последовательность.
- Может быть одного из следующих типов: [Int*, UInt*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Date](../../data-types/date.md), [DateTime](../../data-types/datetime.md), [Decimal*](../../data-types/decimal.md).
- Может быть одного из следующих типов: [Int, UInt](../../data-types/int-uint.md), [Float](../../data-types/float.md), [Date](../../data-types/date.md), [DateTime](../../data-types/datetime.md), [Decimal](../../data-types/decimal.md).
- Не может быть единственным столбцом в секции `JOIN`.
Синтаксис `ASOF JOIN ... ON`:

View File

@ -0,0 +1,45 @@
---
toc_priority: 55
toc_title: sqlite
---
## sqlite {#sqlite}
Позволяет выполнять запросы к данным, хранящимся в базе данных [SQLite](../../engines/database-engines/sqlite.md).
**Синтаксис**
``` sql
sqlite('db_path', 'table_name')
```
**Аргументы**
- `db_path` — путь к файлу с базой данных SQLite. [String](../../sql-reference/data-types/string.md).
- `table_name` — имя таблицы в базе данных SQLite. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Объект таблицы с теми же столбцами, что и в исходной таблице `SQLite`.
**Пример**
Запрос:
``` sql
SELECT * FROM sqlite('sqlite.db', 'table1') ORDER BY col2;
```
Результат:
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
**См. также**
- [SQLite](../../engines/table-engines/integrations/sqlite.md) движок таблиц

View File

@ -56,6 +56,8 @@ template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted
template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true>;
template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
template <template <typename, bool> class Function>
static constexpr bool supportDecimal()
@ -167,6 +169,9 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile<FuncQuantileBFloat16>);
factory.registerFunction(NameQuantilesBFloat16::name, { createAggregateFunctionQuantile<FuncQuantilesBFloat16>, properties });
factory.registerFunction(NameQuantileBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantileBFloat16Weighted>);
factory.registerFunction(NameQuantilesBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantilesBFloat16Weighted>);
/// 'median' is an alias for 'quantile'
factory.registerAlias("median", NameQuantile::name);
factory.registerAlias("medianDeterministic", NameQuantileDeterministic::name);
@ -179,6 +184,7 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
factory.registerAlias("medianTDigest", NameQuantileTDigest::name);
factory.registerAlias("medianTDigestWeighted", NameQuantileTDigestWeighted::name);
factory.registerAlias("medianBFloat16", NameQuantileBFloat16::name);
factory.registerAlias("medianBFloat16Weighted", NameQuantileBFloat16Weighted::name);
}
}

View File

@ -237,5 +237,7 @@ struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDi
struct NameQuantileBFloat16 { static constexpr auto name = "quantileBFloat16"; };
struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16"; };
struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };
}

View File

@ -6,25 +6,37 @@
#include <bitset>
#include <random>
#include <utility>
#include <IO/ReadBufferFromString.h>
#include <Interpreters/Context.h>
#include <IO/ReadBufferFromS3.h>
#include <boost/algorithm/string.hpp>
#include <common/unit.h>
#include <Common/checkStackSize.h>
#include <Common/createHardLink.h>
#include <Common/quoteString.h>
#include <Common/thread_local_rng.h>
#include <Disks/ReadIndirectBufferFromRemoteFS.h>
#include <Disks/WriteIndirectBufferFromRemoteFS.h>
#include <Interpreters/Context.h>
#include <IO/ReadBufferFromS3.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/SeekAvoidingReadBuffer.h>
#include <IO/WriteBufferFromS3.h>
#include <IO/WriteHelpers.h>
#include <Common/createHardLink.h>
#include <Common/quoteString.h>
#include <Common/thread_local_rng.h>
#include <Common/checkStackSize.h>
#include <boost/algorithm/string.hpp>
#include <aws/s3/model/CopyObjectRequest.h> // Y_IGNORE
#include <aws/s3/model/DeleteObjectsRequest.h> // Y_IGNORE
#include <aws/s3/model/GetObjectRequest.h> // Y_IGNORE
#include <aws/s3/model/ListObjectsV2Request.h> // Y_IGNORE
#include <aws/s3/model/HeadObjectRequest.h> // Y_IGNORE
#include <aws/s3/model/CreateMultipartUploadRequest.h> // Y_IGNORE
#include <aws/s3/model/CompleteMultipartUploadRequest.h> // Y_IGNORE
#include <aws/s3/model/UploadPartCopyRequest.h> // Y_IGNORE
#include <aws/s3/model/AbortMultipartUploadRequest.h> // Y_IGNORE
namespace DB
@ -388,16 +400,7 @@ void DiskS3::saveSchemaVersion(const int & version)
void DiskS3::updateObjectMetadata(const String & key, const ObjectMetadata & metadata)
{
auto settings = current_settings.get();
Aws::S3::Model::CopyObjectRequest request;
request.SetCopySource(bucket + "/" + key);
request.SetBucket(bucket);
request.SetKey(key);
request.SetMetadata(metadata);
request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE);
auto outcome = settings->client->CopyObject(request);
throwIfError(outcome);
copyObjectImpl(bucket, key, bucket, key, std::nullopt, metadata);
}
void DiskS3::migrateFileToRestorableSchema(const String & path)
@ -553,18 +556,124 @@ void DiskS3::listObjects(const String & source_bucket, const String & source_pat
} while (outcome.GetResult().GetIsTruncated());
}
void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key) const
void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
std::optional<Aws::S3::Model::HeadObjectResult> head) const
{
if (head && (head->GetContentLength() >= static_cast<Int64>(5_GiB)))
copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head);
else
copyObjectImpl(src_bucket, src_key, dst_bucket, dst_key);
}
void DiskS3::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
std::optional<Aws::S3::Model::HeadObjectResult> head,
std::optional<std::reference_wrapper<const ObjectMetadata>> metadata) const
{
auto settings = current_settings.get();
Aws::S3::Model::CopyObjectRequest request;
request.SetCopySource(src_bucket + "/" + src_key);
request.SetBucket(dst_bucket);
request.SetKey(dst_key);
if (metadata)
{
request.SetMetadata(*metadata);
request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE);
}
auto outcome = settings->client->CopyObject(request);
if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge")
{ // Can't come here with MinIO, MinIO allows single part upload for large objects.
copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata);
return;
}
throwIfError(outcome);
}
void DiskS3::copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
std::optional<Aws::S3::Model::HeadObjectResult> head,
std::optional<std::reference_wrapper<const ObjectMetadata>> metadata) const
{
LOG_DEBUG(log, "Multipart copy upload has created. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, Metadata: {}",
src_bucket, src_key, dst_bucket, dst_key, metadata ? "REPLACE" : "NOT_SET");
auto settings = current_settings.get();
if (!head)
head = headObject(src_bucket, src_key);
size_t size = head->GetContentLength();
String multipart_upload_id;
{
Aws::S3::Model::CreateMultipartUploadRequest request;
request.SetBucket(dst_bucket);
request.SetKey(dst_key);
if (metadata)
request.SetMetadata(*metadata);
auto outcome = settings->client->CreateMultipartUpload(request);
throwIfError(outcome);
multipart_upload_id = outcome.GetResult().GetUploadId();
}
std::vector<String> part_tags;
size_t upload_part_size = settings->s3_min_upload_part_size;
for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size)
{
Aws::S3::Model::UploadPartCopyRequest part_request;
part_request.SetCopySource(src_bucket + "/" + src_key);
part_request.SetBucket(dst_bucket);
part_request.SetKey(dst_key);
part_request.SetUploadId(multipart_upload_id);
part_request.SetPartNumber(part_number);
part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1));
auto outcome = settings->client->UploadPartCopy(part_request);
if (!outcome.IsSuccess())
{
Aws::S3::Model::AbortMultipartUploadRequest abort_request;
abort_request.SetBucket(dst_bucket);
abort_request.SetKey(dst_key);
abort_request.SetUploadId(multipart_upload_id);
settings->client->AbortMultipartUpload(abort_request);
// In error case we throw exception later with first error from UploadPartCopy
}
throwIfError(outcome);
auto etag = outcome.GetResult().GetCopyPartResult().GetETag();
part_tags.push_back(etag);
}
{
Aws::S3::Model::CompleteMultipartUploadRequest req;
req.SetBucket(dst_bucket);
req.SetKey(dst_key);
req.SetUploadId(multipart_upload_id);
Aws::S3::Model::CompletedMultipartUpload multipart_upload;
for (size_t i = 0; i < part_tags.size(); ++i)
{
Aws::S3::Model::CompletedPart part;
multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1));
}
req.SetMultipartUpload(multipart_upload);
auto outcome = settings->client->CompleteMultipartUpload(req);
throwIfError(outcome);
LOG_DEBUG(log, "Multipart copy upload has completed. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, "
"Upload_id: {}, Parts: {}", src_bucket, src_key, dst_bucket, dst_key, multipart_upload_id, part_tags.size());
}
}
struct DiskS3::RestoreInformation
{
UInt64 revision = LATEST_REVISION;
@ -757,7 +866,7 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so
/// Copy object if we restore to different bucket / path.
if (bucket != source_bucket || remote_fs_root_path != source_path)
copyObject(source_bucket, key, bucket, remote_fs_root_path + relative_key);
copyObject(source_bucket, key, bucket, remote_fs_root_path + relative_key, head_result);
metadata.addObject(relative_key, head_result.GetContentLength());
metadata.save();

View File

@ -7,6 +7,7 @@
#if USE_AWS_S3
#include <atomic>
#include <optional>
#include <common/logger_useful.h>
#include "Disks/DiskFactory.h"
#include "Disks/Executor.h"
@ -131,7 +132,15 @@ private:
Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key) const;
void listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback) const;
void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key) const;
void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt) const;
void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
std::optional<std::reference_wrapper<const ObjectMetadata>> metadata = std::nullopt) const;
void copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
std::optional<std::reference_wrapper<const ObjectMetadata>> metadata = std::nullopt) const;
/// Restore S3 metadata files on file system.
void restore();

View File

@ -170,15 +170,18 @@ namespace
auto entity = access_control.tryRead(id);
if (auto role = typeid_cast<RolePtr>(entity))
{
checkGranteeIsAllowed(current_user_access, id, *role);
if (need_check_grantees_are_allowed)
checkGranteeIsAllowed(current_user_access, id, *role);
all_granted_access.makeUnion(role->access);
}
else if (auto user = typeid_cast<UserPtr>(entity))
{
checkGranteeIsAllowed(current_user_access, id, *user);
if (need_check_grantees_are_allowed)
checkGranteeIsAllowed(current_user_access, id, *user);
all_granted_access.makeUnion(user->access);
}
}
need_check_grantees_are_allowed = false; /// already checked
if (!elements_to_revoke.empty() && elements_to_revoke[0].is_partial_revoke)
@ -200,28 +203,6 @@ namespace
current_user_access.checkGrantOption(elements_to_revoke);
}
/// Checks if the current user has enough access rights granted with grant option to grant or revoke specified access rights.
/// Also checks if grantees are allowed for the current user.
void checkGrantOptionAndGrantees(
const AccessControlManager & access_control,
const ContextAccess & current_user_access,
const std::vector<UUID> & grantees_from_query,
const AccessRightsElements & elements_to_grant,
AccessRightsElements & elements_to_revoke)
{
bool need_check_grantees_are_allowed = true;
checkGrantOption(
access_control,
current_user_access,
grantees_from_query,
need_check_grantees_are_allowed,
elements_to_grant,
elements_to_revoke);
if (need_check_grantees_are_allowed)
checkGranteesAreAllowed(access_control, current_user_access, grantees_from_query);
}
/// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles.
void checkAdminOption(
const AccessControlManager & access_control,
@ -262,18 +243,21 @@ namespace
auto entity = access_control.tryRead(id);
if (auto role = typeid_cast<RolePtr>(entity))
{
checkGranteeIsAllowed(current_user_access, id, *role);
if (need_check_grantees_are_allowed)
checkGranteeIsAllowed(current_user_access, id, *role);
all_granted_roles.makeUnion(role->granted_roles);
}
else if (auto user = typeid_cast<UserPtr>(entity))
{
checkGranteeIsAllowed(current_user_access, id, *user);
if (need_check_grantees_are_allowed)
checkGranteeIsAllowed(current_user_access, id, *user);
all_granted_roles.makeUnion(user->granted_roles);
}
}
const auto & all_granted_roles_set = admin_option ? all_granted_roles.getGrantedWithAdminOption() : all_granted_roles.getGranted();
need_check_grantees_are_allowed = false; /// already checked
const auto & all_granted_roles_set = admin_option ? all_granted_roles.getGrantedWithAdminOption() : all_granted_roles.getGranted();
if (roles_to_revoke.all)
boost::range::set_difference(all_granted_roles_set, roles_to_revoke.except_ids, std::back_inserter(roles_to_revoke_ids));
else
@ -283,28 +267,45 @@ namespace
current_user_access.checkAdminOption(roles_to_revoke_ids);
}
/// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles.
/// Also checks if grantees are allowed for the current user.
void checkAdminOptionAndGrantees(
const AccessControlManager & access_control,
const ContextAccess & current_user_access,
const std::vector<UUID> & grantees_from_query,
const std::vector<UUID> & roles_to_grant,
RolesOrUsersSet & roles_to_revoke,
bool admin_option)
/// Returns access rights which should be checked for executing GRANT/REVOKE on cluster.
/// This function is less accurate than checkGrantOption() because it cannot use any information about
/// access rights the grantees currently have (due to those grantees are located on multiple nodes,
/// we just don't have the full information about them).
AccessRightsElements getRequiredAccessForExecutingOnCluster(const AccessRightsElements & elements_to_grant, const AccessRightsElements & elements_to_revoke)
{
bool need_check_grantees_are_allowed = true;
checkAdminOption(
access_control,
current_user_access,
grantees_from_query,
need_check_grantees_are_allowed,
roles_to_grant,
roles_to_revoke,
admin_option);
auto required_access = elements_to_grant;
required_access.insert(required_access.end(), elements_to_revoke.begin(), elements_to_revoke.end());
std::for_each(required_access.begin(), required_access.end(), [&](AccessRightsElement & element) { element.grant_option = true; });
return required_access;
}
if (need_check_grantees_are_allowed)
checkGranteesAreAllowed(access_control, current_user_access, grantees_from_query);
/// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles on cluster.
/// This function is less accurate than checkAdminOption() because it cannot use any information about
/// granted roles the grantees currently have (due to those grantees are located on multiple nodes,
/// we just don't have the full information about them).
void checkAdminOptionForExecutingOnCluster(const ContextAccess & current_user_access,
const std::vector<UUID> roles_to_grant,
const RolesOrUsersSet & roles_to_revoke)
{
if (roles_to_revoke.all)
{
/// Revoking all the roles on cluster always requires ROLE_ADMIN privilege
/// because when we send the query REVOKE ALL to each shard we don't know at this point
/// which roles exactly this is going to revoke on each shard.
/// However ROLE_ADMIN just allows to revoke every role, that's why we check it here.
current_user_access.checkAccess(AccessType::ROLE_ADMIN);
return;
}
if (current_user_access.isGranted(AccessType::ROLE_ADMIN))
return;
for (const auto & role_id : roles_to_grant)
current_user_access.checkAdminOption(role_id);
for (const auto & role_id : roles_to_revoke.getMatchingIDs())
current_user_access.checkAdminOption(role_id);
}
template <typename T>
@ -382,29 +383,39 @@ BlockIO InterpreterGrantQuery::execute()
throw Exception("A partial revoke should be revoked, not granted", ErrorCodes::LOGICAL_ERROR);
auto & access_control = getContext()->getAccessControlManager();
auto current_user_access = getContext()->getAccess();
std::vector<UUID> grantees = RolesOrUsersSet{*query.grantees, access_control, getContext()->getUserID()}.getMatchingIDs(access_control);
/// Check if the current user has corresponding roles granted with admin option.
/// Collect access rights and roles we're going to grant or revoke.
AccessRightsElements elements_to_grant, elements_to_revoke;
collectAccessRightsElementsToGrantOrRevoke(query, elements_to_grant, elements_to_revoke);
std::vector<UUID> roles_to_grant;
RolesOrUsersSet roles_to_revoke;
collectRolesToGrantOrRevoke(access_control, query, roles_to_grant, roles_to_revoke);
checkAdminOptionAndGrantees(access_control, *getContext()->getAccess(), grantees, roles_to_grant, roles_to_revoke, query.admin_option);
/// Executing on cluster.
if (!query.cluster.empty())
{
/// To execute the command GRANT the current user needs to have the access granted with GRANT OPTION.
auto required_access = query.access_rights_elements;
std::for_each(required_access.begin(), required_access.end(), [&](AccessRightsElement & element) { element.grant_option = true; });
checkGranteesAreAllowed(access_control, *getContext()->getAccess(), grantees);
auto required_access = getRequiredAccessForExecutingOnCluster(elements_to_grant, elements_to_revoke);
checkAdminOptionForExecutingOnCluster(*current_user_access, roles_to_grant, roles_to_revoke);
checkGranteesAreAllowed(access_control, *current_user_access, grantees);
return executeDDLQueryOnCluster(query_ptr, getContext(), std::move(required_access));
}
query.replaceEmptyDatabase(getContext()->getCurrentDatabase());
/// Check if the current user has corresponding access rights granted with grant option.
String current_database = getContext()->getCurrentDatabase();
elements_to_grant.replaceEmptyDatabase(current_database);
elements_to_revoke.replaceEmptyDatabase(current_database);
bool need_check_grantees_are_allowed = true;
checkGrantOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, elements_to_grant, elements_to_revoke);
/// Check if the current user has corresponding access rights with grant option.
AccessRightsElements elements_to_grant, elements_to_revoke;
collectAccessRightsElementsToGrantOrRevoke(query, elements_to_grant, elements_to_revoke);
checkGrantOptionAndGrantees(access_control, *getContext()->getAccess(), grantees, elements_to_grant, elements_to_revoke);
/// Check if the current user has corresponding roles granted with admin option.
checkAdminOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, roles_to_grant, roles_to_revoke, query.admin_option);
if (need_check_grantees_are_allowed)
checkGranteesAreAllowed(access_control, *current_user_access, grantees);
/// Update roles and users listed in `grantees`.
auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr

View File

@ -679,6 +679,9 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to)
while (true)
{
if (from->equals(*to))
return true;
auto it_range = ALLOWED_CONVERSIONS.equal_range(typeid(*from));
for (auto it = it_range.first; it != it_range.second; ++it)
{
@ -697,9 +700,9 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to)
const auto * nullable_from = typeid_cast<const DataTypeNullable *>(from);
const auto * nullable_to = typeid_cast<const DataTypeNullable *>(to);
if (nullable_from && nullable_to)
if (nullable_to)
{
from = nullable_from->getNestedType().get();
from = nullable_from ? nullable_from->getNestedType().get() : from;
to = nullable_to->getNestedType().get();
continue;
}

View File

@ -49,6 +49,7 @@ namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_FSTAT;
extern const int CANNOT_TRUNCATE_FILE;
extern const int DATABASE_ACCESS_DENIED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@ -164,6 +165,12 @@ bool StorageFile::isColumnOriented() const
StorageFile::StorageFile(int table_fd_, CommonArguments args)
: StorageFile(args)
{
struct stat buf;
int res = fstat(table_fd_, &buf);
if (-1 == res)
throwFromErrno("Cannot execute fstat", res, ErrorCodes::CANNOT_FSTAT);
total_bytes_to_read = buf.st_size;
if (args.getContext()->getApplicationType() == Context::ApplicationType::SERVER)
throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED);
if (args.format_name == "Distributed")
@ -208,6 +215,8 @@ StorageFile::StorageFile(const std::string & relative_table_dir_path, CommonArgu
String table_dir_path = fs::path(base_path) / relative_table_dir_path / "";
fs::create_directories(table_dir_path);
paths = {getTablePath(table_dir_path, format_name)};
if (fs::exists(paths[0]))
total_bytes_to_read = fs::file_size(paths[0]);
}
StorageFile::StorageFile(CommonArguments args)

View File

@ -8,6 +8,13 @@
<secret_access_key>minio123</secret_access_key>
<s3_max_single_part_upload_size>33554432</s3_max_single_part_upload_size>
</s3>
<unstable_s3>
<type>s3</type>
<endpoint>http://resolver:8081/root/data/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
<s3_max_single_read_retries>10</s3_max_single_read_retries>
</unstable_s3>
<hdd>
<type>local</type>
<path>/</path>
@ -24,6 +31,13 @@
</external>
</volumes>
</s3>
<unstable_s3>
<volumes>
<main>
<disk>unstable_s3</disk>
</main>
</volumes>
</unstable_s3>
</policies>
</storage_configuration>

View File

@ -0,0 +1,64 @@
import http.client
import http.server
import random
import socketserver
import sys
import urllib.parse
UPSTREAM_HOST = "minio1:9001"
random.seed("Unstable proxy/1.0")
def request(command, url, headers={}, data=None):
""" Mini-requests. """
class Dummy:
pass
parts = urllib.parse.urlparse(url)
c = http.client.HTTPConnection(parts.hostname, parts.port)
c.request(command, urllib.parse.urlunparse(parts._replace(scheme='', netloc='')), headers=headers, body=data)
r = c.getresponse()
result = Dummy()
result.status_code = r.status
result.headers = r.headers
result.content = r.read()
return result
class RequestHandler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
if self.path == "/":
self.send_response(200)
self.send_header("Content-Type", "text/plain")
self.end_headers()
self.wfile.write(b"OK")
else:
self.do_HEAD()
def do_PUT(self):
self.do_HEAD()
def do_POST(self):
self.do_HEAD()
def do_HEAD(self):
content_length = self.headers.get("Content-Length")
data = self.rfile.read(int(content_length)) if content_length else None
r = request(self.command, f"http://{UPSTREAM_HOST}{self.path}", headers=self.headers, data=data)
self.send_response(r.status_code)
for k, v in r.headers.items():
self.send_header(k, v)
self.end_headers()
if random.random() < 0.25 and len(r.content) > 1024*1024:
r.content = r.content[:len(r.content)//2]
self.wfile.write(r.content)
self.wfile.close()
class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
"""Handle requests in a separate thread."""
httpd = ThreadedHTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler)
httpd.serve_forever()

View File

@ -54,6 +54,7 @@ def cluster():
logging.info("Starting cluster...")
cluster.start()
logging.info("Cluster started")
run_s3_mocks(cluster)
yield cluster
finally:
@ -77,11 +78,17 @@ def generate_values(date_str, count, sign=1):
return ",".join(["('{}',{},'{}')".format(x, y, z) for x, y, z in data])
def create_table(cluster, table_name, additional_settings=None):
def create_table(cluster, table_name, **additional_settings):
node = cluster.instances["node"]
settings = {
"storage_policy": "s3",
"old_parts_lifetime": 0,
"index_granularity": 512
}
settings.update(additional_settings)
create_table_statement = """
CREATE TABLE {} (
create_table_statement = f"""
CREATE TABLE {table_name} (
dt Date,
id Int64,
data String,
@ -89,19 +96,40 @@ def create_table(cluster, table_name, additional_settings=None):
) ENGINE=MergeTree()
PARTITION BY dt
ORDER BY (dt, id)
SETTINGS
storage_policy='s3',
old_parts_lifetime=0,
index_granularity=512
""".format(table_name)
if additional_settings:
create_table_statement += ","
create_table_statement += additional_settings
SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}"""
node.query(create_table_statement)
def run_s3_mocks(cluster):
logging.info("Starting s3 mocks")
mocks = (
("unstable_proxy.py", "resolver", "8081"),
)
for mock_filename, container, port in mocks:
container_id = cluster.get_container_id(container)
current_dir = os.path.dirname(__file__)
cluster.copy_file_to_container(container_id, os.path.join(current_dir, "s3_mocks", mock_filename), mock_filename)
cluster.exec_in_container(container_id, ["python", mock_filename, port], detach=True)
# Wait for S3 mocks to start
for mock_filename, container, port in mocks:
num_attempts = 100
for attempt in range(num_attempts):
ping_response = cluster.exec_in_container(cluster.get_container_id(container),
["curl", "-s", f"http://localhost:{port}/"], nothrow=True)
if ping_response != "OK":
if attempt == num_attempts - 1:
assert ping_response == "OK", f'Expected "OK", but got "{ping_response}"'
else:
time.sleep(1)
else:
logging.debug(f"mock {mock_filename} ({port}) answered {ping_response} on attempt {attempt}")
break
logging.info("S3 mocks started")
def wait_for_delete_s3_objects(cluster, expected, timeout=30):
minio = cluster.minio_client
while timeout > 0:
@ -136,7 +164,7 @@ def drop_table(cluster):
]
)
def test_simple_insert_select(cluster, min_rows_for_wide_part, files_per_part):
create_table(cluster, "s3_test", additional_settings="min_rows_for_wide_part={}".format(min_rows_for_wide_part))
create_table(cluster, "s3_test", min_rows_for_wide_part=min_rows_for_wide_part)
node = cluster.instances["node"]
minio = cluster.minio_client
@ -158,13 +186,12 @@ def test_simple_insert_select(cluster, min_rows_for_wide_part, files_per_part):
"merge_vertical", [False, True]
)
def test_insert_same_partition_and_merge(cluster, merge_vertical):
settings = None
settings = {}
if merge_vertical:
settings = """
vertical_merge_algorithm_min_rows_to_activate=0,
vertical_merge_algorithm_min_columns_to_activate=0
"""
create_table(cluster, "s3_test", additional_settings=settings)
settings['vertical_merge_algorithm_min_rows_to_activate'] = 0
settings['vertical_merge_algorithm_min_columns_to_activate'] = 0
create_table(cluster, "s3_test", **settings)
node = cluster.instances["node"]
minio = cluster.minio_client
@ -459,3 +486,13 @@ def test_s3_disk_restart_during_load(cluster):
for thread in threads:
thread.join()
def test_s3_disk_reads_on_unstable_connection(cluster):
create_table(cluster, "s3_test", storage_policy='unstable_s3')
node = cluster.instances["node"]
node.query("INSERT INTO s3_test SELECT today(), *, toString(*) FROM system.numbers LIMIT 9000000")
for i in range(30):
print(f"Read sequence {i}")
assert node.query("SELECT sum(id) FROM s3_test").splitlines() == ["40499995500000"]

View File

@ -16,3 +16,5 @@
['2016-06-15 23:00:16']
2016-04-02 17:23:12
['2016-04-02 17:23:12']
2016-04-02 17:23:12
['2016-04-02 17:23:12']

View File

@ -30,4 +30,7 @@ SELECT quantilesTDigestWeighted(0.2)(d, 1) FROM datetime;
SELECT quantileBFloat16(0.2)(d) FROM datetime;
SELECT quantilesBFloat16(0.2)(d) FROM datetime;
SELECT quantileBFloat16Weighted(0.2)(d, 1) FROM datetime;
SELECT quantilesBFloat16Weighted(0.2)(d, 1) FROM datetime;
DROP TABLE datetime;