Merge branch 'master' into fix-hdfs-crash

This commit is contained in:
Kseniia Sumarokova 2021-07-16 22:21:30 +03:00 committed by GitHub
commit e844cec16f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
192 changed files with 2704 additions and 2261 deletions

View File

@ -18,8 +18,6 @@
#define DATE_LUT_MAX (0xFFFFFFFFU - 86400)
#define DATE_LUT_MAX_DAY_NUM 0xFFFF
/// Max int value of Date32, DATE LUT cache size minus daynum_offset_epoch
#define DATE_LUT_MAX_EXTEND_DAY_NUM (DATE_LUT_SIZE - 16436)
/// A constant to add to time_t so every supported time point becomes non-negative and still has the same remainder of division by 3600.
/// If we treat "remainder of division" operation in the sense of modular arithmetic (not like in C++).
@ -272,8 +270,6 @@ public:
auto getOffsetAtStartOfEpoch() const { return offset_at_start_of_epoch; }
auto getTimeOffsetAtStartOfLUT() const { return offset_at_start_of_lut; }
auto getDayNumOffsetEpoch() const { return daynum_offset_epoch; }
/// All functions below are thread-safe; arguments are not checked.
inline ExtendedDayNum toDayNum(ExtendedDayNum d) const
@ -930,17 +926,15 @@ public:
{
if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
return LUTIndex(0);
auto year_lut_index = (year - DATE_LUT_MIN_YEAR) * 12 + month - 1;
UInt32 index = years_months_lut[year_lut_index].toUnderType() + day_of_month - 1;
/// When date is out of range, default value is DATE_LUT_SIZE - 1 (2283-11-11)
return LUTIndex{std::min(index, static_cast<UInt32>(DATE_LUT_SIZE - 1))};
return LUTIndex{years_months_lut[(year - DATE_LUT_MIN_YEAR) * 12 + month - 1] + day_of_month - 1};
}
/// Create DayNum from year, month, day of month.
inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const
inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month) const
{
if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31))
return ExtendedDayNum(default_error_day_num);
return ExtendedDayNum(0);
return toDayNum(makeLUTIndex(year, month, day_of_month));
}
@ -1097,9 +1091,9 @@ public:
return lut[new_index].date + time;
}
inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int32 delta) const
inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const
{
return addDays(t, static_cast<Int64>(delta) * 7);
return addDays(t, delta * 7);
}
inline UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const
@ -1164,14 +1158,14 @@ public:
return toDayNum(addMonthsIndex(d, delta));
}
inline Time NO_SANITIZE_UNDEFINED addQuarters(Time t, Int32 delta) const
inline Time NO_SANITIZE_UNDEFINED addQuarters(Time t, Int64 delta) const
{
return addMonths(t, static_cast<Int64>(delta) * 3);
return addMonths(t, delta * 3);
}
inline ExtendedDayNum addQuarters(ExtendedDayNum d, Int32 delta) const
inline ExtendedDayNum addQuarters(ExtendedDayNum d, Int64 delta) const
{
return addMonths(d, static_cast<Int64>(delta) * 3);
return addMonths(d, delta * 3);
}
template <typename DateOrTime>

View File

@ -70,14 +70,6 @@ public:
m_day = values.day_of_month;
}
explicit LocalDate(ExtendedDayNum day_num)
{
const auto & values = DateLUT::instance().getValues(day_num);
m_year = values.year;
m_month = values.month;
m_day = values.day_of_month;
}
LocalDate(unsigned short year_, unsigned char month_, unsigned char day_)
: m_year(year_), m_month(month_), m_day(day_)
{
@ -106,12 +98,6 @@ public:
return DayNum(lut.makeDayNum(m_year, m_month, m_day).toUnderType());
}
ExtendedDayNum getExtenedDayNum() const
{
const auto & lut = DateLUT::instance();
return ExtendedDayNum (lut.makeDayNum(m_year, m_month, m_day).toUnderType());
}
operator DayNum() const
{
return getDayNum();

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 5994506908028612869fee627d68d8212dfe7c1e
Subproject commit 7351c4691b5d401f59e3959adfc5b4fa263b32da

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 07c77549a20b63ff6981b400085eba36bb5c80c4
Subproject commit 6ff0adefdc84dac44e78804f7ca4122fe992cf8d

View File

@ -32,7 +32,7 @@ RUN rm -rf \
RUN apt-get clean
# Install MySQL ODBC driver
RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
RUN curl 'https://downloads.mysql.com/archives/get/p/10/file/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --location --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so
# Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper.
# ZooKeeper is not started by default, but consumes some space in containers.
@ -49,4 +49,3 @@ RUN mkdir /zookeeper && chmod -R 777 /zookeeper
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

View File

@ -118,7 +118,7 @@ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
# Print Fatal log messages to stdout
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
# Grep logs for sanitizer asserts, crashes and other critical errors
@ -131,22 +131,22 @@ zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" > /dev
rm -f /test_output/tmp
# OOM
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Logical errors
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
# Crash
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv

View File

@ -47,7 +47,7 @@ EXCHANGE TABLES new_table AND old_table;
### ReplicatedMergeTree in Atomic Database {#replicatedmergetree-in-atomic-database}
For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables, it is recommended to not specify engine parameters - path in ZooKeeper and replica name. In this case, configuration parameters will be used [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). If you want to specify engine parameters explicitly, it is recommended to use {uuid} macros. This is useful so that unique paths are automatically generated for each table in ZooKeeper.
For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables, it is recommended to not specify engine parameters - path in ZooKeeper and replica name. In this case, configuration parameters will be used [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). If you want to specify engine parameters explicitly, it is recommended to use `{uuid}` macros. This is useful so that unique paths are automatically generated for each table in ZooKeeper.
## See Also

View File

@ -22,4 +22,4 @@ You can also use the following database engines:
- [PostgreSQL](../../engines/database-engines/postgresql.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->
- [Replicated](../../engines/database-engines/replicated.md)

View File

@ -0,0 +1,115 @@
# [experimental] Replicated {#replicated}
The engine is based on the [Atomic](../../engines/database-engines/atomic.md) engine. It supports replication of metadata via DDL log being written to ZooKeeper and executed on all of the replicas for a given database.
One ClickHouse server can have multiple replicated databases running and updating at the same time. But there can't be multiple replicas of the same replicated database.
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...]
```
**Engine Parameters**
- `zoo_path` — ZooKeeper path. The same ZooKeeper path corresponds to the same database.
- `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`.
- `replica_name` — Replica name. Replica names must be different for all replicas of the same shard.
!!! note "Warning"
For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database.
## Specifics and Recommendations {#specifics-and-recommendations}
DDL queries with `Replicated` database work in a similar way to [ON CLUSTER](../../sql-reference/distributed-ddl.md) queries, but with minor differences.
First, the DDL request tries to execute on the initiator (the host that originally received the request from the user). If the request is not fulfilled, then the user immediately receives an error, other hosts do not try to fulfill it. If the request has been successfully completed on the initiator, then all other hosts will automatically retry until they complete it. The initiator will try to wait for the query to be completed on other hosts (no longer than [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout)) and will return a table with the query execution statuses on each host.
The behavior in case of errors is regulated by the [distributed_ddl_output_mode](../../operations/settings/settings.md#distributed_ddl_output_mode) setting, for a `Replicated` database it is better to set it to `null_status_on_timeout` — i.e. if some hosts did not have time to execute the request for [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout), then do not throw an exception, but show the `NULL` status for them in the table.
The [system.clusters](../../operations/system-tables/clusters.md) system table contains a cluster named like the replicated database, which consists of all replicas of the database. This cluster is updated automatically when creating/deleting replicas, and it can be used for [Distributed](../../engines/table-engines/special/distributed.md#distributed) tables.
When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata).
## Usage Example {#usage-example}
Creating a cluster with three hosts:
``` sql
node1 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','replica1');
node2 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','other_replica');
node3 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','{replica}');
```
Running the DDL-query:
``` sql
CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
```
``` text
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
│ shard1|other_replica │ 0 │ │ 1 │ 0 │
│ other_shard|r1 │ 0 │ │ 0 │ 0 │
└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘
```
Showing the system table:
``` sql
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
FROM system.clusters WHERE cluster='r';
```
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
```
Creating a distributed table and inserting the data:
``` sql
node2 :) CREATE TABLE r.d (n UInt64) ENGINE=Distributed('r','r','rmt', n % 2);
node3 :) INSERT INTO r SELECT * FROM numbers(10);
node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host;
```
``` text
┌─hosts─┬─groupArray(n)─┐
│ node1 │ [1,3,5,7,9] │
│ node2 │ [0,2,4,6,8] │
└───────┴───────────────┘
```
Adding replica on the one more host:
``` sql
node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2');
```
The cluster configuration will look like this:
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
```
The distributed table also will get data from the new host:
```sql
node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host;
```
```text
┌─hosts─┬─groupArray(n)─┐
│ node2 │ [1,3,5,7,9] │
│ node4 │ [0,2,4,6,8] │
└───────┴───────────────┘
```

View File

@ -76,7 +76,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `SAMPLE BY` — An expression for sampling. Optional.
If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
If a sampling expression is used, the primary key must contain it. The result of sampling expression must be unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
- `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.

View File

@ -1130,17 +1130,18 @@ The table below shows supported data types and how they match ClickHouse [data t
| `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql-reference/data-types/int-uint.md), [UInt64](../sql-reference/data-types/int-uint.md) | `long` |
| `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql-reference/data-types/float.md) | `float` |
| `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql-reference/data-types/float.md) | `double` |
| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` |
| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` or `string` \* |
| `bytes`, `string`, `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | `fixed(N)` |
| `enum` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `enum` |
| `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | `array(T)` |
| `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(null, T)` |
| `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | `null` |
| `int (date)` \* | [Date](../sql-reference/data-types/date.md) | `int (date)` \* |
| `long (timestamp-millis)` \* | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
| `long (timestamp-micros)` \* | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
| `int (date)` \** | [Date](../sql-reference/data-types/date.md) | `int (date)` \** |
| `long (timestamp-millis)` \** | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* |
| `long (timestamp-micros)` \** | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* |
\* [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](../operations/settings/settings.md#settings-output_format_avro_string_column_pattern)
\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
Unsupported Avro data types: `record` (non-root), `map`
@ -1246,12 +1247,14 @@ The table below shows supported data types and how they match ClickHouse [data t
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the Parquet `DECIMAL` type as the ClickHouse `Decimal128` type.
@ -1299,13 +1302,17 @@ The table below shows supported data types and how they match ClickHouse [data t
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](../sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type.
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the Arrow `DECIMAL` type as the ClickHouse `Decimal128` type.
@ -1358,8 +1365,10 @@ The table below shows supported data types and how they match ClickHouse [data t
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.

View File

@ -157,5 +157,6 @@ toc_title: Adopters
| <a href="https://signoz.io/" class="favicon">SigNoz</a> | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) |
| <a href="https://chelpipegroup.com/" class="favicon">ChelPipe Group</a> | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) |
| <a href="https://zagravagames.com/en/" class="favicon">Zagrava Trading</a> | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) |
| <a href="https://beeline.ru/" class="favicon">Beeline</a> | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -10,7 +10,7 @@ ClickHouse server use [ZooKeeper](https://zookeeper.apache.org/) coordination sy
!!! warning "Warning"
This feature currently in pre-production stage. We test it in our CI and on small internal installations.
## Implemetation details
## Implementation details
ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper `clickhouse-keeper` written in C++ and use [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages.

View File

@ -278,4 +278,16 @@ Possible values:
Default value: `0`.
## check_sample_column_is_correct {#check_sample_column_is_correct}
Enables to check column for sampling or sampling expression is correct at table creation.
Possible values:
- true — Check column or sampling expression is correct at table creation.
- false — Do not check column or sampling expression is correct at table creation.
Default value: `true`.
By default, the ClickHouse server check column for sampling or sampling expression at table creation. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting.
[Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) <!--hide-->

View File

@ -1738,7 +1738,7 @@ Default value: 0.
## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns}
Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read.
Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read.
These functions can be transformed:
@ -1969,6 +1969,13 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB)
Default value: 32768 (32 KiB)
## output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern}
Regexp of column names of type String to output as Avro `string` (default is `bytes`).
RE2 syntax is supported.
Type: string
## format_avro_schema_registry_url {#format_avro_schema_registry_url}
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format.
@ -1998,6 +2005,16 @@ Possible values:
Default value: 16.
## merge_selecting_sleep_ms {#merge_selecting_sleep_ms}
Sleep time for merge selecting when no part selected, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters
Possible values:
- Any positive integer.
Default value: 5000
## parallel_distributed_insert_select {#parallel_distributed_insert_select}
Enables parallel distributed `INSERT ... SELECT` query.
@ -3131,6 +3148,53 @@ SELECT
FROM fuse_tbl
```
## allow_experimental_database_replicated {#allow_experimental_database_replicated}
Enables to create databases with [Replicated](../../engines/database-engines/replicated.md) engine.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: `0`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds.
Possible values:
- Positive integer.
- 0 — Unlimited.
Default value: `300`.
## distributed_ddl_task_timeout {#distributed_ddl_task_timeout}
Sets timeout for DDL query responses from all hosts in cluster. If a DDL request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite.
Possible values:
- Positive integer.
- 0 — Async mode.
- Negative integer — infinite timeout.
Default value: `180`.
## distributed_ddl_output_mode {#distributed_ddl_output_mode}
Sets format of distributed DDL query result.
Possible values:
- `throw` — Returns result set with query execution status for all hosts where query is finished. If query has failed on some hosts, then it will rethrow the first exception. If query is not finished yet on some hosts and [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) exceeded, then it throws `TIMEOUT_EXCEEDED` exception.
- `none` — Is similar to throw, but distributed DDL query returns no result set.
- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts.
- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts.
Default value: `throw`.
## flatten_nested {#flatten-nested}
Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns.
@ -3210,3 +3274,14 @@ Default value: `1`.
**Usage**
If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays.
## output_format_arrow_low_cardinality_as_dictionary {#output-format-arrow-low-cardinality-as-dictionary}
Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md#data-format-arrow) format for `SELECT` queries.
Possible values:
- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type.
- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type.
Default value: `0`.

View File

@ -47,6 +47,7 @@ Settings:
- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary)
Functions:
@ -57,5 +58,3 @@ Functions:
- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality).
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).
[Original article](https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/) <!--hide-->

View File

@ -465,27 +465,29 @@ Result:
## CAST(x, T) {#type_conversion_function-cast}
Converts input value `x` to the `T` data type. Unlike to `reinterpret` function, type conversion is performed in a natural way.
The syntax `CAST(x AS t)` is also supported.
!!! note "Note"
If value `x` does not fit the bounds of type `T`, the function overflows. For example, `CAST(-1, 'UInt8')` returns `255`.
Converts an input value to the specified data type. Unlike the [reinterpret](#type_conversion_function-reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised.
Several syntax variants are supported.
**Syntax**
``` sql
CAST(x, T)
CAST(x AS t)
x::t
```
**Arguments**
- `x` — Any type.
- `T` — Destination type. [String](../../sql-reference/data-types/string.md).
- `x` — A value to convert. May be of any type.
- `T` — The name of the target data type. [String](../../sql-reference/data-types/string.md).
- `t` — The target data type.
**Returned value**
- Destination type value.
- Converted value.
!!! note "Note"
If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`.
**Examples**
@ -494,16 +496,16 @@ Query:
```sql
SELECT
CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint,
CAST(toInt8(1), 'Float32') AS cast_int_to_float,
CAST('1', 'UInt32') AS cast_string_to_int;
CAST(1.5 AS Decimal(3,2)) AS cast_float_to_decimal,
'1'::Int32 AS cast_string_to_int;
```
Result:
```
┌─cast_int_to_uint─┬─cast_int_to_float─┬─cast_string_to_int─┐
│ 255 │ 1 │ 1 │
└──────────────────┴───────────────────┴────────────────────┘
┌─cast_int_to_uint─┬─cast_float_to_decimal─┬─cast_string_to_int─┐
│ 255 │ 1.50 │ 1 │
└──────────────────┴───────────────────────┴────────────────────┘
```
Query:

View File

@ -1 +0,0 @@
../../en/development/build-osx.md

View File

@ -0,0 +1,125 @@
---
toc_priority: 65
toc_title: Сборка на Mac OS X
---
# Как собрать ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
Сборка должна запускаться с x86_64 (Intel) на macOS версии 10.15 (Catalina) и выше в последней версии компилятора Xcode's native AppleClang, Homebrew's vanilla Clang или в GCC-компиляторах.
## Установка Homebrew {#install-homebrew}
``` bash
$ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
```
## Установка Xcode и инструментов командной строки {#install-xcode-and-command-line-tools}
1. Установите из App Store последнюю версию [Xcode](https://apps.apple.com/am/app/xcode/id497799835?mt=12).
2. Запустите ее, чтобы принять лицензионное соглашение. Необходимые компоненты установятся автоматически.
3. Затем убедитесь, что в системе выбрана последняя версия инструментов командной строки:
``` bash
$ sudo rm -rf /Library/Developer/CommandLineTools
$ sudo xcode-select --install
```
4. Перезагрузитесь.
## Установка компиляторов, инструментов и библиотек {#install-required-compilers-tools-and-libraries}
``` bash
$ brew update
$ brew install cmake ninja libtool gettext llvm gcc
```
## Просмотр исходников ClickHouse {#checkout-clickhouse-sources}
``` bash
$ git clone --recursive git@github.com:ClickHouse/ClickHouse.git # or https://github.com/ClickHouse/ClickHouse.git
```
## Сборка ClickHouse {#build-clickhouse}
Чтобы запустить сборку в компиляторе Xcode's native AppleClang:
``` bash
$ cd ClickHouse
$ rm -rf build
$ mkdir build
$ cd build
$ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
```
Чтобы запустить сборку в компиляторе Homebrew's vanilla Clang:
``` bash
$ cd ClickHouse
$ rm -rf build
$ mkdir build
$ cd build
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
```
Чтобы собрать с помощью компилятора Homebrew's vanilla GCC:
``` bash
$ cd ClickHouse
$ rm -rf build
$ mkdir build
$ cd build
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-10 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-10 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
```
## Предупреждения {#caveats}
Если будете запускать `clickhouse-server`, убедитесь, что увеличили системную переменную `maxfiles`.
!!! info "Note"
Вам понадобится команда `sudo`.
1. Создайте файл `/Library/LaunchDaemons/limit.maxfiles.plist` и поместите в него следующее:
``` xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Label</key>
<string>limit.maxfiles</string>
<key>ProgramArguments</key>
<array>
<string>launchctl</string>
<string>limit</string>
<string>maxfiles</string>
<string>524288</string>
<string>524288</string>
</array>
<key>RunAtLoad</key>
<true/>
<key>ServiceIPC</key>
<false/>
</dict>
</plist>
```
2. Выполните команду:
``` bash
$ sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist
```
3. Перезагрузитесь.
4. Чтобы проверить, как это работает, выполните команду `ulimit -n`.
[Original article](https://clickhouse.tech/docs/en/development/build_osx/) <!--hide-->

View File

@ -20,3 +20,5 @@ toc_title: "Введение"
- [PostgreSQL](../../engines/database-engines/postgresql.md)
- [Replicated](../../engines/database-engines/replicated.md)

View File

@ -0,0 +1,119 @@
# [экспериментальный] Replicated {#replicated}
Движок основан на движке [Atomic](../../engines/database-engines/atomic.md). Он поддерживает репликацию метаданных через журнал DDL, записываемый в ZooKeeper и выполняемый на всех репликах для данной базы данных.
На одном сервере ClickHouse может одновременно работать и обновляться несколько реплицированных баз данных. Но не может существовать нескольких реплик одной и той же реплицированной базы данных.
## Создание базы данных {#creating-a-database}
``` sql
CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...]
```
**Параметры движка**
- `zoo_path` — путь в ZooKeeper. Один и тот же путь ZooKeeper соответствует одной и той же базе данных.
- `shard_name` — Имя шарда. Реплики базы данных группируются в шарды по имени.
- `replica_name` — Имя реплики. Имена реплик должны быть разными для всех реплик одного и того же шарда.
!!! note "Предупреждение"
Для таблиц [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) если аргументы не заданы, то используются аргументы по умолчанию: `/clickhouse/tables/{uuid}/{shard}` и `{replica}`. Они могут быть изменены в серверных настройках: [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) и [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Макрос `{uuid}` раскрывается в `UUID` таблицы, `{shard}` и `{replica}` — в значения из конфига сервера. В будущем появится возможность использовать значения `shard_name` и `replica_name` аргументов движка базы данных `Replicated`.
## Особенности и рекомендации {#specifics-and-recommendations}
DDL-запросы с базой данных `Replicated` работают похожим образом на [ON CLUSTER](../../sql-reference/distributed-ddl.md) запросы, но с небольшими отличиями.
Сначала DDL-запрос пытается выполниться на инициаторе (том хосте, который изначально получил запрос от пользователя). Если запрос не выполнился, то пользователь сразу получает ошибку, другие хосты не пытаются его выполнить. Если запрос успешно выполнился на инициаторе, то все остальные хосты будут автоматически делать попытки выполнить его.
Инициатор попытается дождаться выполнения запроса на других хостах (не дольше [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout)) и вернёт таблицу со статусами выполнения запроса на каждом хосте.
Поведение в случае ошибок регулируется настройкой [distributed_ddl_output_mode](../../operations/settings/settings.md#distributed_ddl_output_mode), для `Replicated` лучше выставлять её в `null_status_on_timeout` — т.е. если какие-то хосты не успели выполнить запрос за [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout), то вместо исключения для них будет показан статус `NULL` в таблице.
В системной таблице [system.clusters](../../operations/system-tables/clusters.md) есть кластер с именем, как у реплицируемой базы, который состоит из всех реплик базы. Этот кластер обновляется автоматически при создании/удалении реплик, и его можно использовать для [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблиц.
При создании новой реплики базы, эта реплика сама создаёт таблицы. Если реплика долго была недоступна и отстала от лога репликации — она сверяет свои локальные метаданные с актуальными метаданными в ZooKeeper, перекладывает лишние таблицы с данными в отдельную нереплицируемую базу (чтобы случайно не удалить что-нибудь лишнее), создаёт недостающие таблицы, обновляет имена таблиц, если были переименования. Данные реплицируются на уровне `ReplicatedMergeTree`, т.е. если таблица не реплицируемая, то данные реплицироваться не будут (база отвечает только за метаданные).
## Примеры использования {#usage-example}
Создадим реплицируемую базу на трех хостах:
``` sql
node1 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','replica1');
node2 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','other_replica');
node3 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','{replica}');
```
Выполним DDL-запрос на одном из хостов:
``` sql
CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
```
Запрос выполнится на всех остальных хостах:
``` text
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
│ shard1|other_replica │ 0 │ │ 1 │ 0 │
│ other_shard|r1 │ 0 │ │ 0 │ 0 │
└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘
```
Кластер в системной таблице `system.clusters`:
``` sql
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
FROM system.clusters WHERE cluster='r';
```
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
```
Создадим распределенную таблицу и вставим в нее данные:
``` sql
node2 :) CREATE TABLE r.d (n UInt64) ENGINE=Distributed('r','r','rmt', n % 2);
node3 :) INSERT INTO r SELECT * FROM numbers(10);
node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host;
```
``` text
┌─hosts─┬─groupArray(n)─┐
│ node1 │ [1,3,5,7,9] │
│ node2 │ [0,2,4,6,8] │
└───────┴───────────────┘
```
Добавление реплики:
``` sql
node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2');
```
Новая реплика автоматически создаст все таблицы, которые есть в базе, а старые реплики перезагрузят из ZooKeeper-а конфигурацию кластера:
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
```
Распределенная таблица также получит данные от нового хоста:
```sql
node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host;
```
```text
┌─hosts─┬─groupArray(n)─┐
│ node2 │ [1,3,5,7,9] │
│ node4 │ [0,2,4,6,8] │
└───────┴───────────────┘
```

View File

@ -100,9 +100,9 @@ sudo ./clickhouse install
Для других операционных систем и архитектуры AArch64 сборки ClickHouse предоставляются в виде кросс-компилированного бинарного файла из последнего коммита ветки `master` (с задержкой в несколько часов).
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
После скачивания можно воспользоваться `clickhouse client` для подключения к серверу или `clickhouse local` для обработки локальных данных.

View File

@ -1165,12 +1165,14 @@ SELECT * FROM topic1_stream;
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
@ -1218,12 +1220,17 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными.
Тип `DICTIONARY` поддерживается для запросов `INSERT`. Для запросов `SELECT` есть настройка [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary), которая позволяет выводить тип [LowCardinality](../sql-reference/data-types/lowcardinality.md) как `DICTIONARY`.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных Arrow `DECIMAL` как `Decimal128`.
@ -1276,8 +1283,10 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` |
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.

View File

@ -2986,6 +2986,53 @@ SELECT
FROM fuse_tbl
```
## allow_experimental_database_replicated {#allow_experimental_database_replicated}
Позволяет создавать базы данных с движком [Replicated](../../engines/database-engines/replicated.md).
Возможные значения:
- 0 — Disabled.
- 1 — Enabled.
Значение по умолчанию: `0`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Устанавливает, как долго начальный DDL-запрос должен ждать, пока реплицированная база данных прецессирует предыдущие записи очереди DDL в секундах.
Возможные значения:
- Положительное целое число.
- 0 — Не ограничено.
Значение по умолчанию: `300`.
## distributed_ddl_task_timeout {#distributed_ddl_task_timeout}
Устанавливает тайм-аут для ответов на DDL-запросы от всех хостов в кластере. Если DDL-запрос не был выполнен на всех хостах, ответ будет содержать ошибку тайм-аута, и запрос будет выполнен в асинхронном режиме.
Возможные значения:
- Положительное целое число.
- 0 — Асинхронный режим.
- Отрицательное число — бесконечный тайм-аут.
Значение по умолчанию: `180`.
## distributed_ddl_output_mode {#distributed_ddl_output_mode}
Задает формат результата распределенного DDL-запроса.
Возможные значения:
- `throw` — возвращает набор результатов со статусом выполнения запросов для всех хостов, где завершен запрос. Если запрос не выполнился на некоторых хостах, то будет выброшено исключение. Если запрос еще не закончен на некоторых хостах и таймаут [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) превышен, то выбрасывается исключение `TIMEOUT_EXCEEDED`.
- `none` — идентично `throw`, но распределенный DDL-запрос не возвращает набор результатов.
- `null_status_on_timeout` — возвращает `NULL` в качестве статуса выполнения в некоторых строках набора результатов вместо выбрасывания `TIMEOUT_EXCEEDED`, если запрос не закончен на соответствующих хостах.
- `never_throw` — не выбрасывает исключение и `TIMEOUT_EXCEEDED`, если запрос не удался на некоторых хостах.
Значение по умолчанию: `throw`.
## flatten_nested {#flatten-nested}
Устанавливает формат данных у [вложенных](../../sql-reference/data-types/nested-data-structures/nested.md) столбцов.
@ -3066,3 +3113,14 @@ SETTINGS index_granularity = 8192 │
**Использование**
Если установлено значение `0`, то табличная функция не делает Nullable столбцы, а вместо NULL выставляет значения по умолчанию для скалярного типа. Это также применимо для значений NULL внутри массивов.
## output_format_arrow_low_cardinality_as_dictionary {#output-format-arrow-low-cardinality-as-dictionary}
Позволяет конвертировать тип [LowCardinality](../../sql-reference/data-types/lowcardinality.md) в тип `DICTIONARY` формата [Arrow](../../interfaces/formats.md#data-format-arrow) для запросов `SELECT`.
Возможные значения:
- 0 — тип `LowCardinality` не конвертируется в тип `DICTIONARY`.
- 1 — тип `LowCardinality` конвертируется в тип `DICTIONARY`.
Значение по умолчанию: `0`.

View File

@ -15,7 +15,7 @@ LowCardinality(data_type)
**Параметры**
- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md) и числа за исключением типа [Decimal](decimal.md). `LowCardinality` неэффективен для некоторых типов данных, см. описание настройки [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types).
- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md) и числа за исключением типа [Decimal](decimal.md). `LowCardinality` неэффективен для некоторых типов данных, см. описание настройки [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types).
## Описание {#lowcardinality-dscr}
@ -23,11 +23,11 @@ LowCardinality(data_type)
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
При работе со строками, использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
## Пример
Создать таблицу со столбцами типа `LowCardinality`:
Создание таблицы со столбцами типа `LowCardinality`:
```sql
CREATE TABLE lc_t
@ -43,18 +43,18 @@ ORDER BY id
Настройки:
- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size)
- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size)
- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary)
Функции:
- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality)
- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality)
## Смотрите также
- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality).
- [Reducing Clickhouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).
- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality).
- [Reducing Clickhouse Storage Cost with the Low Cardinality Type Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/).
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf).

View File

@ -462,27 +462,29 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
## CAST(x, T) {#type_conversion_function-cast}
Преобразует входное значение `x` в указанный тип данных `T`. В отличии от функции `reinterpret` использует внешнее представление значения `x`.
Поддерживается также синтаксис `CAST(x AS t)`.
!!! warning "Предупреждение"
Если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255.
Преобразует входное значение к указанному типу данных. В отличие от функции [reinterpret](#type_conversion_function-reinterpret) `CAST` пытается представить то же самое значение в новом типе данных. Если преобразование невозможно, то возникает исключение.
Поддерживается несколько вариантов синтаксиса.
**Синтаксис**
``` sql
CAST(x, T)
CAST(x AS t)
x::t
```
**Аргументы**
- `x` — любой тип данных.
- `T` — конечный тип данных. [String](../../sql-reference/data-types/string.md).
- `x` — значение, которое нужно преобразовать. Может быть любого типа.
- `T` — имя типа данных. [String](../../sql-reference/data-types/string.md).
- `t` — тип данных.
**Возвращаемое значение**
- Значение конечного типа данных.
- Преобразованное значение.
!!! note "Примечание"
Если входное значение выходит за границы нового типа, то результат переполняется. Например, `CAST(-1, 'UInt8')` возвращает `255`.
**Примеры**
@ -491,16 +493,16 @@ CAST(x, T)
```sql
SELECT
CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint,
CAST(toInt8(1), 'Float32') AS cast_int_to_float,
CAST('1', 'UInt32') AS cast_string_to_int
CAST(1.5 AS Decimal(3,2)) AS cast_float_to_decimal,
'1'::Int32 AS cast_string_to_int;
```
Результат:
```
┌─cast_int_to_uint─┬─cast_int_to_float─┬─cast_string_to_int─┐
│ 255 │ 1 │ 1 │
└──────────────────┴───────────────────┴────────────────────┘
┌─cast_int_to_uint─┬─cast_float_to_decimal─┬─cast_string_to_int─┐
│ 255 │ 1.50 │ 1 │
└──────────────────┴───────────────────────┴────────────────────┘
```
Запрос:
@ -524,7 +526,7 @@ SELECT
Преобразование в FixedString(N) работает только для аргументов типа [String](../../sql-reference/data-types/string.md) или [FixedString](../../sql-reference/data-types/fixedstring.md).
Поддерживается преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно.
Поддерживается преобразование к типу [Nullable](../../sql-reference/data-types/nullable.md) и обратно.
**Примеры**

View File

@ -17,7 +17,7 @@ toc_title: PARTITION
- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) — очистить построенные вторичные индексы для заданной партиции;
- [FREEZE PARTITION](#alter_freeze-partition) — создать резервную копию партиции;
- [UNFREEZE PARTITION](#alter_unfreeze-partition) — удалить резервную копию партиции;
- [FETCH PARTITION](#alter_fetch-partition) — скачать партицию с другого сервера;
- [FETCH PARTITION\|PART](#alter_fetch-partition) — скачать партицию/кусок с другого сервера;
- [MOVE PARTITION\|PART](#alter_move-partition) — переместить партицию/кускок на другой диск или том.
- [UPDATE IN PARTITION](#update-in-partition) — обновить данные внутри партиции по условию.
- [DELETE IN PARTITION](#delete-in-partition) — удалить данные внутри партиции по условию.
@ -209,29 +209,35 @@ ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name
Удаляет с диска "замороженные" партиции с указанным именем. Если секция `PARTITION` опущена, запрос удаляет резервную копию всех партиций сразу.
## FETCH PARTITION {#alter_fetch-partition}
## FETCH PARTITION\|PART {#alter_fetch-partition}
``` sql
ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper'
ALTER TABLE table_name FETCH PARTITION|PART partition_expr FROM 'path-in-zookeeper'
```
Загружает партицию с другого сервера. Этот запрос работает только для реплицированных таблиц.
Запрос выполняет следующее:
1. Загружает партицию с указанного шарда. Путь к шарду задается в секции `FROM` (path-in-zookeeper). Обратите внимание, нужно задавать путь к шарду в ZooKeeper.
1. Загружает партицию/кусок с указанного шарда. Путь к шарду задается в секции `FROM` (path-in-zookeeper). Обратите внимание, нужно задавать путь к шарду в ZooKeeper.
2. Помещает загруженные данные в директорию `detached` таблицы `table_name`. Чтобы прикрепить эти данные к таблице, используйте запрос [ATTACH PARTITION\|PART](#alter_attach-partition).
Например:
1. FETCH PARTITION
``` sql
ALTER TABLE users FETCH PARTITION 201902 FROM '/clickhouse/tables/01-01/visits';
ALTER TABLE users ATTACH PARTITION 201902;
```
2. FETCH PART
``` sql
ALTER TABLE users FETCH PART 201901_2_2_0 FROM '/clickhouse/tables/01-01/visits';
ALTER TABLE users ATTACH PART 201901_2_2_0;
```
Следует иметь в виду:
- Запрос `ALTER TABLE t FETCH PARTITION` не реплицируется. Он загружает партицию в директорию `detached` только на локальном сервере.
- Запрос `ALTER TABLE t FETCH PARTITION|PART` не реплицируется. Он загружает партицию в директорию `detached` только на локальном сервере.
- Запрос `ALTER TABLE t ATTACH` реплицируется — он добавляет данные в таблицу сразу на всех репликах. На одной из реплик данные будут добавлены из директории `detached`, а на других — из соседних реплик.
Перед загрузкой данных система проверяет, существует ли партиция и совпадает ли её структура со структурой таблицы. При этом автоматически выбирается наиболее актуальная реплика среди всех живых реплик.

View File

@ -282,7 +282,7 @@ GRANT INSERT(x,y) ON db.table TO john
- `ALTER MATERIALIZE TTL`. Уровень: `TABLE`. Алиасы: `MATERIALIZE TTL`
- `ALTER SETTINGS`. Уровень: `TABLE`. Алиасы: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING`
- `ALTER MOVE PARTITION`. Уровень: `TABLE`. Алиасы: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART`
- `ALTER FETCH PARTITION`. Уровень: `TABLE`. Алиасы: `FETCH PARTITION`
- `ALTER FETCH PARTITION`. Уровень: `TABLE`. Алиасы: `ALTER FETCH PART`, `FETCH PARTITION`, `FETCH PART`
- `ALTER FREEZE PARTITION`. Уровень: `TABLE`. Алиасы: `FREEZE PARTITION`
- `ALTER VIEW` Уровень: `GROUP`
- `ALTER VIEW REFRESH `. Уровень: `VIEW`. Алиасы: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW`

View File

@ -6,12 +6,12 @@ toc_title: Atomic
# Atomic {#atomic}
It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. Atomic database engine is used by default.
它支持非阻塞 DROP 和 RENAME TABLE 查询以及原子 EXCHANGE TABLES t1 AND t2 查询。默认情况下使用Atomic数据库引擎。
## Creating a Database {#creating-a-database}
## 创建数据库 {#creating-a-database}
```sql
CREATE DATABASE test ENGINE = Atomic;
```
[Original article](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) <!--hide-->
[原文](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) <!--hide-->

View File

@ -1,4 +1,4 @@
# 折叠树 {#table_engine-collapsingmergetree}
# CollapsingMergeTree {#table_engine-collapsingmergetree}
该引擎继承于 [MergeTree](mergetree.md),并在数据块合并算法中添加了折叠行的逻辑。
@ -203,4 +203,4 @@ SELECT * FROM UAct FINAL
这种查询数据的方法是非常低效的。不要在大表中使用它。
[来源文章](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) <!--hide-->
[原文](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) <!--hide-->

View File

@ -3,7 +3,7 @@ toc_priority: 37
toc_title: "版本折叠MergeTree"
---
# 版本折叠MergeTree {#versionedcollapsingmergetree}
# VersionedCollapsingMergeTree {#versionedcollapsingmergetree}
这个引擎:

View File

@ -5,6 +5,6 @@ toc_title: 原生接口(TCP)
# 原生接口TCP{#native-interface-tcp}
原生接口用于[命令行客户端](cli.md)用于分布式查询处理期间的服务器间通信以及其他C++程序。可惜的是,原生的ClickHouse协议还没有正式的规范但它可以从ClickHouse[源代码](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)通过拦截和分析TCP流量进行反向工程。
原生接口协议用于[命令行客户端](cli.md)用于分布式查询处理期间的服务器间通信以及其他C++ 程序。不幸的是,原生ClickHouse协议还没有正式的规范但它可以从ClickHouse源代码[从这里开始](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)或通过拦截和分析TCP流量进行逆向工程。
[来源文章](https://clickhouse.tech/docs/zh/interfaces/tcp/) <!--hide-->
[原文](https://clickhouse.tech/docs/en/interfaces/tcp/) <!--hide-->

View File

@ -57,9 +57,9 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix).
- 表格预览。
- 自动完成。
### ツ环板-ョツ嘉ッツ偲 {#clickhouse-cli}
### clickhouse-cli {#clickhouse-cli}
[ツ环板-ョツ嘉ッツ偲](https://github.com/hatarist/clickhouse-cli) 是ClickHouse的替代命令行客户端用Python 3编写。
[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) 是ClickHouse的替代命令行客户端用Python 3编写。
特征:
@ -68,15 +68,15 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix).
- 寻呼机支持数据输出。
- 自定义PostgreSQL类命令。
### ツ暗ェツ氾环催ツ団ツ法ツ人 {#clickhouse-flamegraph}
### clickhouse-flamegraph {#clickhouse-flamegraph}
[clickhouse-flamegraph](https://github.com/Slach/clickhouse-flamegraph) 是一个可视化的专业工具`system.trace_log`如[flamegraph](http://www.brendangregg.com/flamegraphs.html).
## 商业 {#shang-ye}
### ツ环板Softwareョツ嘉ッ {#holistics-software}
### Holistics {#holistics-software}
[整体学](https://www.holistics.io/) 在2019年被Gartner FrontRunners列为可用性最高排名第二的商业智能工具之一。 Holistics是一个基于SQL的全栈数据平台和商业智能工具用于设置您的分析流程。
[Holistics](https://www.holistics.io/) 在2019年被Gartner FrontRunners列为可用性最高排名第二的商业智能工具之一。 Holistics是一个基于SQL的全栈数据平台和商业智能工具用于设置您的分析流程。
特征:

View File

@ -5,9 +5,21 @@ toc_title: "操作"
# 操作 {#operations}
Clickhouse运维手册主要包含下面几部分
ClickHouse操作手册由以下主要部分组成
- 安装要求
- [安装要求](../operations/requirements.md)
- [监控](../operations/monitoring.md)
- [故障排除](../operations/troubleshooting.md)
- [使用建议](../operations/tips.md)
- [更新程序](../operations/update.md)
- [访问权限](../operations/access-rights.md)
- [数据备份](../operations/backup.md)
- [配置文件](../operations/configuration-files.md)
- [配额](../operations/quotas.md)
- [系统表](../operations/system-tables/index.md)
- [服务器配置参数](../operations/server-configuration-parameters/index.md)
- [如何用ClickHouse测试你的硬件](../operations/performance-test.md)
- [设置](../operations/settings/index.md)
- [实用工具](../operations/utilities/index.md)
[原始文章](https://clickhouse.tech/docs/en/operations/) <!--hide-->
[原文](https://clickhouse.tech/docs/en/operations/) <!--hide-->

View File

@ -9,7 +9,7 @@
Do not use any JavaScript or CSS frameworks or preprocessors.
This HTML page should not require any build systems (node.js, npm, gulp, etc.)
This HTML page should not be minified, instead it should be reasonably minimalistic by itself.
This HTML page should not load any external resources
This HTML page should not load any external resources on load.
(CSS and JavaScript must be embedded directly to the page. No external fonts or images should be loaded).
This UI should look as lightweight, clean and fast as possible.
All UI elements must be aligned in pixel-perfect way.
@ -343,13 +343,18 @@
/// Save query in history only if it is different.
let previous_query = '';
/// Substitute the address of the server where the page is served.
if (location.protocol != 'file:') {
const current_url = new URL(window.location);
const server_address = current_url.searchParams.get('url');
if (server_address) {
document.getElementById('url').value = server_address;
} else if (location.protocol != 'file:') {
/// Substitute the address of the server where the page is served.
document.getElementById('url').value = location.origin;
}
/// Substitute user name if it's specified in the query string
let user_from_url = (new URL(window.location)).searchParams.get('user');
const user_from_url = current_url.searchParams.get('user');
if (user_from_url) {
document.getElementById('user').value = user_from_url;
}
@ -361,7 +366,9 @@
let user = document.getElementById('user').value;
let password = document.getElementById('password').value;
let url = document.getElementById('url').value +
let server_address = document.getElementById('url').value;
let url = server_address +
/// Ask server to allow cross-domain requests.
'?add_http_cors_header=1' +
'&user=' + encodeURIComponent(user) +
@ -390,11 +397,18 @@
response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit.
};
let title = "ClickHouse Query: " + query;
let url = window.location.pathname + '?user=' + encodeURIComponent(user) + '#' + window.btoa(query);
let history_url = window.location.pathname + '?user=' + encodeURIComponent(user);
if (server_address != location.origin) {
/// Save server's address in URL if it's not identical to the address of the play UI.
history_url += '&url=' + encodeURIComponent(server_address);
}
history_url += '#' + window.btoa(query);
if (previous_query == '') {
history.replaceState(state, title, url);
history.replaceState(state, title, history_url);
} else {
history.pushState(state, title, url);
history.pushState(state, title, history_url);
}
document.title = title;
previous_query = query;
@ -599,10 +613,16 @@
}
/// Huge JS libraries should be loaded only if needed.
function loadJS(src) {
function loadJS(src, integrity) {
return new Promise((resolve, reject) => {
const script = document.createElement('script');
script.src = src;
if (integrity) {
script.crossOrigin = 'anonymous';
script.integrity = integrity;
} else {
console.warn('no integrity for', src)
}
script.addEventListener('load', function() { resolve(true); });
document.head.appendChild(script);
});
@ -613,10 +633,14 @@
if (load_dagre_promise) { return load_dagre_promise; }
load_dagre_promise = Promise.all([
loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js'),
loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js'),
loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js'),
loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0'),
loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js',
'sha384-2IH3T69EIKYC4c+RXZifZRvaH5SRUdacJW7j6HtE5rQbvLhKKdawxq6vpIzJ7j9M'),
loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js',
'sha384-Q7oatU+b+y0oTkSoiRH9wTLH6sROySROCILZso/AbMMm9uKeq++r8ujD4l4f+CWj'),
loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js',
'sha384-9N1ty7Yz7VKL3aJbOk+8ParYNW8G5W+MvxEfFL9G7CRYPmkHI9gJqyAfSI/8190W'),
loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0',
'sha384-S+Kf0r6YzKIhKA8d1k2/xtYv+j0xYUU3E7+5YLrcPVab6hBh/r1J6cq90OXhw80u'),
]);
return load_dagre_promise;

View File

@ -64,7 +64,12 @@ public:
std::lock_guard lock{mutex};
auto x = cache.get(params);
if (x)
return *x;
{
if ((*x)->getUser())
return *x;
/// No user, probably the user has been dropped while it was in the cache.
cache.remove(params);
}
auto res = std::shared_ptr<ContextAccess>(new ContextAccess(manager, params));
cache.add(params, res);
return res;

View File

@ -655,7 +655,7 @@ private:
for (auto & [lhs_childname, lhs_child] : *children)
{
if (!rhs.tryGetChild(lhs_childname))
lhs_child.flags |= rhs.flags & lhs_child.getAllGrantableFlags();
lhs_child.addGrantsRec(rhs.flags);
}
}
}
@ -673,7 +673,7 @@ private:
for (auto & [lhs_childname, lhs_child] : *children)
{
if (!rhs.tryGetChild(lhs_childname))
lhs_child.flags &= rhs.flags;
lhs_child.removeGrantsRec(~rhs.flags);
}
}
}
@ -1041,17 +1041,15 @@ void AccessRights::makeIntersection(const AccessRights & other)
auto helper = [](std::unique_ptr<Node> & root_node, const std::unique_ptr<Node> & other_root_node)
{
if (!root_node)
return;
if (!other_root_node)
{
if (other_root_node)
root_node = std::make_unique<Node>(*other_root_node);
root_node = nullptr;
return;
}
if (other_root_node)
{
root_node->makeIntersection(*other_root_node);
if (!root_node->flags && !root_node->children)
root_node = nullptr;
}
root_node->makeIntersection(*other_root_node);
if (!root_node->flags && !root_node->children)
root_node = nullptr;
};
helper(root, other.root);
helper(root_with_grant_option, other.root_with_grant_option);

View File

@ -163,11 +163,10 @@ void ContextAccess::setUser(const UserPtr & user_) const
if (!user)
{
/// User has been dropped.
auto nothing_granted = std::make_shared<AccessRights>();
access = nothing_granted;
access_with_implicit = nothing_granted;
subscription_for_user_change = {};
subscription_for_roles_changes = {};
access = nullptr;
access_with_implicit = nullptr;
enabled_roles = nullptr;
roles_info = nullptr;
enabled_row_policies = nullptr;
@ -252,32 +251,45 @@ String ContextAccess::getUserName() const
std::shared_ptr<const EnabledRolesInfo> ContextAccess::getRolesInfo() const
{
std::lock_guard lock{mutex};
return roles_info;
if (roles_info)
return roles_info;
static const auto no_roles = std::make_shared<EnabledRolesInfo>();
return no_roles;
}
std::shared_ptr<const EnabledRowPolicies> ContextAccess::getEnabledRowPolicies() const
{
std::lock_guard lock{mutex};
return enabled_row_policies;
if (enabled_row_policies)
return enabled_row_policies;
static const auto no_row_policies = std::make_shared<EnabledRowPolicies>();
return no_row_policies;
}
ASTPtr ContextAccess::getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition) const
{
std::lock_guard lock{mutex};
return enabled_row_policies ? enabled_row_policies->getCondition(database, table_name, index, extra_condition) : nullptr;
if (enabled_row_policies)
return enabled_row_policies->getCondition(database, table_name, index, extra_condition);
return nullptr;
}
std::shared_ptr<const EnabledQuota> ContextAccess::getQuota() const
{
std::lock_guard lock{mutex};
return enabled_quota;
if (enabled_quota)
return enabled_quota;
static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota();
return unlimited_quota;
}
std::optional<QuotaUsage> ContextAccess::getQuotaUsage() const
{
std::lock_guard lock{mutex};
return enabled_quota ? enabled_quota->getUsage() : std::optional<QuotaUsage>{};
if (enabled_quota)
return enabled_quota->getUsage();
return {};
}
@ -288,7 +300,7 @@ std::shared_ptr<const ContextAccess> ContextAccess::getFullAccess()
auto full_access = std::shared_ptr<ContextAccess>(new ContextAccess);
full_access->is_full_access = true;
full_access->access = std::make_shared<AccessRights>(AccessRights::getFullAccess());
full_access->enabled_quota = EnabledQuota::getUnlimitedQuota();
full_access->access_with_implicit = std::make_shared<AccessRights>(addImplicitAccessRights(*full_access->access));
return full_access;
}();
return res;
@ -298,28 +310,40 @@ std::shared_ptr<const ContextAccess> ContextAccess::getFullAccess()
std::shared_ptr<const Settings> ContextAccess::getDefaultSettings() const
{
std::lock_guard lock{mutex};
return enabled_settings ? enabled_settings->getSettings() : nullptr;
if (enabled_settings)
return enabled_settings->getSettings();
static const auto everything_by_default = std::make_shared<Settings>();
return everything_by_default;
}
std::shared_ptr<const SettingsConstraints> ContextAccess::getSettingsConstraints() const
{
std::lock_guard lock{mutex};
return enabled_settings ? enabled_settings->getConstraints() : nullptr;
if (enabled_settings)
return enabled_settings->getConstraints();
static const auto no_constraints = std::make_shared<SettingsConstraints>();
return no_constraints;
}
std::shared_ptr<const AccessRights> ContextAccess::getAccessRights() const
{
std::lock_guard lock{mutex};
return access;
if (access)
return access;
static const auto nothing_granted = std::make_shared<AccessRights>();
return nothing_granted;
}
std::shared_ptr<const AccessRights> ContextAccess::getAccessRightsWithImplicit() const
{
std::lock_guard lock{mutex};
return access_with_implicit;
if (access_with_implicit)
return access_with_implicit;
static const auto nothing_granted = std::make_shared<AccessRights>();
return nothing_granted;
}
@ -551,7 +575,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const
for (auto it = std::begin(role_ids); it != std::end(role_ids); ++it, ++i)
{
const UUID & role_id = *it;
if (info && info->enabled_roles_with_admin_option.count(role_id))
if (info->enabled_roles_with_admin_option.count(role_id))
continue;
if (throw_if_denied)
@ -560,7 +584,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const
if (!role_name)
role_name = "ID {" + toString(role_id) + "}";
if (info && info->enabled_roles.count(role_id))
if (info->enabled_roles.count(role_id))
show_error("Not enough privileges. "
"Role " + backQuote(*role_name) + " is granted, but without ADMIN option. "
"To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option.",

View File

@ -71,11 +71,9 @@ public:
String getUserName() const;
/// Returns information about current and enabled roles.
/// The function can return nullptr.
std::shared_ptr<const EnabledRolesInfo> getRolesInfo() const;
/// Returns information about enabled row policies.
/// The function can return nullptr.
std::shared_ptr<const EnabledRowPolicies> getEnabledRowPolicies() const;
/// Returns the row policy filter for a specified table.
@ -83,16 +81,13 @@ public:
ASTPtr getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition = nullptr) const;
/// Returns the quota to track resource consumption.
/// The function returns nullptr if no tracking or limitation is needed.
std::shared_ptr<const EnabledQuota> getQuota() const;
std::optional<QuotaUsage> getQuotaUsage() const;
/// Returns the default settings, i.e. the settings to apply on user's login.
/// The function returns nullptr if it's no need to apply settings.
std::shared_ptr<const Settings> getDefaultSettings() const;
/// Returns the settings' constraints.
/// The function returns nullptr if there are no constraints.
std::shared_ptr<const SettingsConstraints> getSettingsConstraints() const;
/// Returns the current access rights.

View File

@ -12,8 +12,11 @@ size_t EnabledRowPolicies::Hash::operator()(const MixedConditionKey & key) const
}
EnabledRowPolicies::EnabledRowPolicies(const Params & params_)
: params(params_)
EnabledRowPolicies::EnabledRowPolicies() : params()
{
}
EnabledRowPolicies::EnabledRowPolicies(const Params & params_) : params(params_)
{
}

View File

@ -32,6 +32,7 @@ public:
friend bool operator >=(const Params & lhs, const Params & rhs) { return !(lhs < rhs); }
};
EnabledRowPolicies();
~EnabledRowPolicies();
using ConditionType = RowPolicy::ConditionType;

View File

@ -18,6 +18,8 @@ namespace ErrorCodes
}
SettingsConstraints::SettingsConstraints() = default;
SettingsConstraints::SettingsConstraints(const AccessControlManager & manager_) : manager(&manager_)
{
}
@ -199,10 +201,13 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
}
};
if (reaction == THROW_ON_VIOLATION)
manager->checkSettingNameIsAllowed(setting_name);
else if (!manager->isSettingNameAllowed(setting_name))
return false;
if (manager)
{
if (reaction == THROW_ON_VIOLATION)
manager->checkSettingNameIsAllowed(setting_name);
else if (!manager->isSettingNameAllowed(setting_name))
return false;
}
Field current_value, new_value;
if (current_settings.tryGet(setting_name, current_value))

View File

@ -51,6 +51,7 @@ class AccessControlManager;
class SettingsConstraints
{
public:
SettingsConstraints();
SettingsConstraints(const AccessControlManager & manager_);
SettingsConstraints(const SettingsConstraints & src);
SettingsConstraints & operator =(const SettingsConstraints & src);

View File

@ -0,0 +1,94 @@
#include <gtest/gtest.h>
#include <Access/AccessRights.h>
using namespace DB;
TEST(AccessRights, Union)
{
AccessRights lhs, rhs;
lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1");
rhs.grant(AccessType::SELECT, "db2");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*");
lhs.clear();
rhs.clear();
rhs.grant(AccessType::SELECT, "db2");
lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT);
rhs.grant(AccessType::SELECT, "db1", "tb1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT ON *.*");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"});
rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"});
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1, col2, col3) ON db1.tb1");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"});
rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"});
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1) ON db1.tb1, GRANT SELECT(col2, col3) ON db1.tb1 WITH GRANT OPTION");
lhs = {};
rhs = {};
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeUnion(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
}
TEST(AccessRights, Intersection)
{
AccessRights lhs, rhs;
lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1");
rhs.grant(AccessType::SELECT, "db2");
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*");
lhs.clear();
rhs.clear();
lhs.grant(AccessType::SELECT, "db2");
rhs.grant(AccessType::CREATE_TABLE, "db1", "tb1");
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT);
rhs.grant(AccessType::SELECT, "db1", "tb1");
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT ON db1.tb1");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"});
rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"});
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1");
lhs = {};
rhs = {};
lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"});
rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"});
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1");
lhs = {};
rhs = {};
lhs.grant(AccessType::INSERT);
rhs.grant(AccessType::ALL, "db1");
lhs.makeIntersection(rhs);
ASSERT_EQ(lhs.toString(), "GRANT INSERT ON db1.*");
}

View File

@ -3,7 +3,6 @@
#include <AggregateFunctions/AggregateFunctionSequenceMatch.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <common/range.h>

View File

@ -4,7 +4,6 @@
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeUUID.h>
@ -50,8 +49,6 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const
return res;
else if (which.isDate())
return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data>>(argument_types);
else if (which.isDate32())
return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data>>(argument_types);
else if (which.isDateTime())
return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data>>(argument_types);
else if (which.isStringOrFixedString())
@ -98,8 +95,6 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const
return res;
else if (which.isDate())
return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data<DataTypeDate::FieldType>>>(argument_types);
else if (which.isDate32())
return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data<DataTypeDate32::FieldType>>>(argument_types);
else if (which.isDateTime())
return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType>>>(argument_types);
else if (which.isStringOrFixedString())

View File

@ -6,7 +6,6 @@
#include <Common/FieldVisitorConvertToNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <functional>
@ -52,8 +51,6 @@ namespace
return res;
else if (which.isDate())
return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate::FieldType>>(argument_types, params);
else if (which.isDate32())
return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDate32::FieldType>>(argument_types, params);
else if (which.isDateTime())
return std::make_shared<typename WithK<K, HashValueType>::template AggregateFunction<DataTypeDateTime::FieldType>>(argument_types, params);
else if (which.isStringOrFixedString())

View File

@ -3,7 +3,6 @@
#include <AggregateFunctions/AggregateFunctionUniqUpTo.h>
#include <Common/FieldVisitorConvertToNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
@ -62,8 +61,6 @@ AggregateFunctionPtr createAggregateFunctionUniqUpTo(const std::string & name, c
return res;
else if (which.isDate())
return std::make_shared<AggregateFunctionUniqUpTo<DataTypeDate::FieldType>>(threshold, argument_types, params);
else if (which.isDate32())
return std::make_shared<AggregateFunctionUniqUpTo<DataTypeDate32::FieldType>>(threshold, argument_types, params);
else if (which.isDateTime())
return std::make_shared<AggregateFunctionUniqUpTo<DataTypeDateTime::FieldType>>(threshold, argument_types, params);
else if (which.isStringOrFixedString())

View File

@ -4,7 +4,6 @@
#include <AggregateFunctions/Helpers.h>
#include <Core/Settings.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <common/range.h>

View File

@ -109,11 +109,23 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
/// It should not affect client address checking, since client cannot connect from IPv6 address
/// if server has no IPv6 addresses.
flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG;
DNSResolver::IPAddresses addresses;
try
{
#if defined(ARCADIA_BUILD)
auto addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses();
addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses();
#else
auto addresses = Poco::Net::DNS::hostByName(host, flags).addresses();
addresses = Poco::Net::DNS::hostByName(host, flags).addresses();
#endif
}
catch (const Poco::Net::DNSException & e)
{
LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.message());
addresses.clear();
}
if (addresses.empty())
throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR);

View File

@ -224,7 +224,7 @@
M(PerfLocalMemoryReferences, "Local NUMA node memory reads") \
M(PerfLocalMemoryMisses, "Local NUMA node memory read misses") \
\
M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
M(CreatedHTTPConnections, "Total amount of created HTTP connections (counter increase every time connection is created).") \
\
M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \

View File

@ -375,9 +375,13 @@ void Block::setColumn(size_t position, ColumnWithTypeAndName && column)
throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Block::setColumn(), max position {}",
position, toString(data.size()));
data[position].name = std::move(column.name);
data[position].type = std::move(column.type);
data[position].column = std::move(column.column);
if (data[position].name != column.name)
{
index_by_name.erase(data[position].name);
index_by_name.emplace(column.name, position);
}
data[position] = std::move(column);
}
@ -436,7 +440,7 @@ Block Block::sortColumns() const
Block sorted_block;
/// std::unordered_map (index_by_name) cannot be used to guarantee the sort order
std::vector<decltype(index_by_name.begin())> sorted_index_by_name(index_by_name.size());
std::vector<IndexByName::const_iterator> sorted_index_by_name(index_by_name.size());
{
size_t i = 0;
for (auto it = index_by_name.begin(); it != index_by_name.end(); ++it)

View File

@ -68,7 +68,7 @@ public:
const_cast<const Block *>(this)->findByName(name));
}
const ColumnWithTypeAndName* findByName(const std::string & name) const;
const ColumnWithTypeAndName * findByName(const std::string & name) const;
ColumnWithTypeAndName & getByName(const std::string & name)
{

View File

@ -62,8 +62,6 @@ void ExternalResultDescription::init(const Block & sample_block_)
types.emplace_back(ValueType::vtString, is_nullable);
else if (which.isDate())
types.emplace_back(ValueType::vtDate, is_nullable);
else if (which.isDate32())
types.emplace_back(ValueType::vtDate32, is_nullable);
else if (which.isDateTime())
types.emplace_back(ValueType::vtDateTime, is_nullable);
else if (which.isUUID())

View File

@ -26,7 +26,6 @@ struct ExternalResultDescription
vtEnum16,
vtString,
vtDate,
vtDate32,
vtDateTime,
vtUUID,
vtDateTime64,

View File

@ -9,6 +9,7 @@
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Interpreters/convertFieldToType.h>
#include <IO/ReadHelpers.h>
@ -89,9 +90,6 @@ void insertPostgreSQLValue(
case ExternalResultDescription::ValueType::vtDate:
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()});
break;
case ExternalResultDescription::ValueType::vtDate32:
assert_cast<ColumnInt32 &>(column).insertValue(Int32{LocalDate{std::string(value)}.getExtenedDayNum()});
break;
case ExternalResultDescription::ValueType::vtDateTime:
{
ReadBufferFromString in(value);
@ -102,7 +100,16 @@ void insertPostgreSQLValue(
assert_cast<ColumnUInt32 &>(column).insertValue(time);
break;
}
case ExternalResultDescription::ValueType::vtDateTime64:[[fallthrough]];
case ExternalResultDescription::ValueType::vtDateTime64:
{
ReadBufferFromString in(value);
DateTime64 time = 0;
readDateTime64Text(time, 6, in, assert_cast<const DataTypeDateTime64 *>(data_type.get())->getTimeZone());
if (time < 0)
time = 0;
assert_cast<ColumnDecimal<Decimal64> &>(column).insertValue(time);
break;
}
case ExternalResultDescription::ValueType::vtDecimal32: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal64: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal128: [[fallthrough]];
@ -204,6 +211,18 @@ void preparePostgreSQLArrayInfo(
ReadBufferFromString in(field);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(nested.get())->getTimeZone());
if (time < 0)
time = 0;
return time;
};
else if (which.isDateTime64())
parser = [nested](std::string & field) -> Field
{
ReadBufferFromString in(field);
DateTime64 time = 0;
readDateTime64Text(time, 6, in, assert_cast<const DataTypeDateTime64 *>(nested.get())->getTimeZone());
if (time < 0)
time = 0;
return time;
};
else if (which.isDecimal32())

View File

@ -482,6 +482,8 @@ class IColumn;
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
\
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
\
@ -524,6 +526,7 @@ class IColumn;
M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \
M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
\
M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \
\

View File

@ -39,7 +39,6 @@ enum class TypeIndex
Float32,
Float64,
Date,
Date32,
DateTime,
DateTime64,
String,
@ -258,7 +257,6 @@ inline constexpr const char * getTypeName(TypeIndex idx)
case TypeIndex::Float32: return "Float32";
case TypeIndex::Float64: return "Float64";
case TypeIndex::Date: return "Date";
case TypeIndex::Date32: return "Date32";
case TypeIndex::DateTime: return "DateTime";
case TypeIndex::DateTime64: return "DateTime64";
case TypeIndex::String: return "String";

View File

@ -192,7 +192,6 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args)
case TypeIndex::Decimal256: return f(TypePair<DataTypeDecimal<Decimal256>, T>(), std::forward<ExtraArgs>(args)...);
case TypeIndex::Date: return f(TypePair<DataTypeDate, T>(), std::forward<ExtraArgs>(args)...);
case TypeIndex::Date32: return f(TypePair<DataTypeDate, T>(), std::forward<ExtraArgs>(args)...);
case TypeIndex::DateTime: return f(TypePair<DataTypeDateTime, T>(), std::forward<ExtraArgs>(args)...);
case TypeIndex::DateTime64: return f(TypePair<DataTypeDateTime64, T>(), std::forward<ExtraArgs>(args)...);

View File

@ -1,23 +0,0 @@
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationDate32.h>
namespace DB
{
bool DataTypeDate32::equals(const IDataType & rhs) const
{
return typeid(rhs) == typeid(*this);
}
SerializationPtr DataTypeDate32::doGetDefaultSerialization() const
{
return std::make_shared<SerializationDate32>();
}
void registerDataTypeDate32(DataTypeFactory & factory)
{
factory.registerSimpleDataType(
"Date32", [] { return DataTypePtr(std::make_shared<DataTypeDate32>()); }, DataTypeFactory::CaseInsensitive);
}
}

View File

@ -1,23 +0,0 @@
#pragma once
#include <DataTypes/DataTypeNumberBase.h>
namespace DB
{
class DataTypeDate32 final : public DataTypeNumberBase<Int32>
{
public:
static constexpr auto family_name = "Date32";
TypeIndex getTypeId() const override { return TypeIndex::Date32; }
const char * getFamilyName() const override { return family_name; }
bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool equals(const IDataType & rhs) const override;
protected:
SerializationPtr doGetDefaultSerialization() const override;
};
}

View File

@ -194,7 +194,6 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeNumbers(*this);
registerDataTypeDecimal(*this);
registerDataTypeDate(*this);
registerDataTypeDate32(*this);
registerDataTypeDateTime(*this);
registerDataTypeString(*this);
registerDataTypeFixedString(*this);

View File

@ -69,7 +69,6 @@ private:
void registerDataTypeNumbers(DataTypeFactory & factory);
void registerDataTypeDecimal(DataTypeFactory & factory);
void registerDataTypeDate(DataTypeFactory & factory);
void registerDataTypeDate32(DataTypeFactory & factory);
void registerDataTypeDateTime(DataTypeFactory & factory);
void registerDataTypeString(DataTypeFactory & factory);
void registerDataTypeFixedString(DataTypeFactory & factory);

View File

@ -78,8 +78,6 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDat
return creator(static_cast<ColumnFixedString *>(nullptr));
else if (which.isDate())
return creator(static_cast<ColumnVector<UInt16> *>(nullptr));
else if (which.isDate32())
return creator(static_cast<ColumnVector<Int32> *>(nullptr));
else if (which.isDateTime())
return creator(static_cast<ColumnVector<UInt32> *>(nullptr));
else if (which.isUUID())

View File

@ -322,10 +322,8 @@ struct WhichDataType
constexpr bool isEnum() const { return isEnum8() || isEnum16(); }
constexpr bool isDate() const { return idx == TypeIndex::Date; }
constexpr bool isDate32() const { return idx == TypeIndex::Date32; }
constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; }
constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; }
constexpr bool isDateOrDate32() const { return isDate() || isDate32(); }
constexpr bool isString() const { return idx == TypeIndex::String; }
constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; }
@ -349,10 +347,6 @@ struct WhichDataType
template <typename T>
inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); }
template <typename T>
inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); }
template <typename T>
inline bool isDateOrDate32(const T & data_type) { return WhichDataType(data_type).isDateOrDate32(); }
template <typename T>
inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); }
template <typename T>
inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); }

View File

@ -1,78 +0,0 @@
#include <DataTypes/Serializations/SerializationDate32.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Common/assert_cast.h>
namespace DB
{
void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeDateText(ExtendedDayNum(assert_cast<const ColumnInt32 &>(column).getData()[row_num]), ostr);
}
void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}
void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
readDateText(x, istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}
void SerializationDate32::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
}
void SerializationDate32::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
assertChar('\'', istr);
readDateText(x, istr);
assertChar('\'', istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
ExtendedDayNum x;
assertChar('"', istr);
readDateText(x, istr);
assertChar('"', istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(x);
}
void SerializationDate32::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
LocalDate value;
readCSV(value, istr);
assert_cast<ColumnInt32 &>(column).getData().push_back(value.getExtenedDayNum());
}
}

View File

@ -1,21 +0,0 @@
#pragma once
#include <DataTypes/Serializations/SerializationNumber.h>
namespace DB
{
class SerializationDate32 final : public SerializationNumber<Int32>
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
};
}

View File

@ -16,7 +16,6 @@ SRCS(
DataTypeCustomIPv4AndIPv6.cpp
DataTypeCustomSimpleAggregateFunction.cpp
DataTypeDate.cpp
DataTypeDate32.cpp
DataTypeDateTime.cpp
DataTypeDateTime64.cpp
DataTypeDecimalBase.cpp
@ -46,7 +45,6 @@ SRCS(
Serializations/SerializationArray.cpp
Serializations/SerializationCustomSimpleText.cpp
Serializations/SerializationDate.cpp
Serializations/SerializationDate32.cpp
Serializations/SerializationDateTime.cpp
Serializations/SerializationDateTime64.cpp
Serializations/SerializationDecimal.cpp

View File

@ -9,7 +9,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <Common/quoteString.h>
@ -71,7 +71,7 @@ static DataTypePtr convertPostgreSQLDataType(String & type, const std::function<
else if (type == "bigserial")
res = std::make_shared<DataTypeUInt64>();
else if (type.starts_with("timestamp"))
res = std::make_shared<DataTypeDateTime>();
res = std::make_shared<DataTypeDateTime64>(6);
else if (type == "date")
res = std::make_shared<DataTypeDate>();
else if (type.starts_with("numeric"))

View File

@ -10,6 +10,7 @@
#include <Parsers/ASTColumnDeclaration.h>
#include <Interpreters/Context.h>
#include <Storages/StorageSQLite.h>
#include <Databases/SQLite/SQLiteUtils.h>
namespace DB
@ -30,17 +31,7 @@ DatabaseSQLite::DatabaseSQLite(
, database_engine_define(database_engine_define_->clone())
, log(&Poco::Logger::get("DatabaseSQLite"))
{
sqlite3 * tmp_sqlite_db = nullptr;
int status = sqlite3_open(database_path_.c_str(), &tmp_sqlite_db);
if (status != SQLITE_OK)
{
throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR,
"Cannot access sqlite database. Error status: {}. Message: {}",
status, sqlite3_errstr(status));
}
sqlite_db = std::shared_ptr<sqlite3>(tmp_sqlite_db, sqlite3_close);
sqlite_db = openSQLiteDB(database_path_, context_);
}

View File

@ -0,0 +1,57 @@
#include "SQLiteUtils.h"
#if USE_SQLITE
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int PATH_ACCESS_DENIED;
}
String validateSQLiteDatabasePath(const String & path, const String & user_files_path)
{
String canonical_user_files_path = fs::canonical(user_files_path);
String canonical_path;
std::error_code err;
if (fs::path(path).is_relative())
canonical_path = fs::canonical(fs::path(user_files_path) / path, err);
else
canonical_path = fs::canonical(path, err);
if (err)
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "SQLite database path '{}' is invalid. Error: {}", path, err.message());
if (!canonical_path.starts_with(canonical_user_files_path))
throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
"SQLite database file path '{}' must be inside 'user_files' directory", path);
return canonical_path;
}
SQLitePtr openSQLiteDB(const String & database_path, ContextPtr context)
{
auto validated_path = validateSQLiteDatabasePath(database_path, context->getUserFilesPath());
sqlite3 * tmp_sqlite_db = nullptr;
int status = sqlite3_open(validated_path.c_str(), &tmp_sqlite_db);
if (status != SQLITE_OK)
throw Exception(ErrorCodes::PATH_ACCESS_DENIED,
"Cannot access sqlite database. Error status: {}. Message: {}",
status, sqlite3_errstr(status));
return std::shared_ptr<sqlite3>(tmp_sqlite_db, sqlite3_close);
}
}
#endif

View File

@ -0,0 +1,22 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_SQLITE
#include <Core/Types.h>
#include <Interpreters/Context.h>
#include <sqlite3.h> // Y_IGNORE
namespace DB
{
using SQLitePtr = std::shared_ptr<sqlite3>;
SQLitePtr openSQLiteDB(const String & database_path, ContextPtr context);
}
#endif

View File

@ -28,6 +28,7 @@ SRCS(
MySQL/MaterializeMySQLSettings.cpp
MySQL/MaterializeMySQLSyncThread.cpp
SQLite/DatabaseSQLite.cpp
SQLite/SQLiteUtils.cpp
SQLite/fetchSQLiteTableStructure.cpp
)

View File

@ -5,6 +5,7 @@
#include <IO/FileEncryptionCommon.h>
#include <IO/ReadBufferFromEncryptedFile.h>
#include <IO/WriteBufferFromEncryptedFile.h>
#include <boost/algorithm/hex.hpp>
namespace DB
@ -12,13 +13,81 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DISK_INDEX;
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int LOGICAL_ERROR;
}
using DiskEncryptedPtr = std::shared_ptr<DiskEncrypted>;
using namespace FileEncryption;
namespace
{
using DiskEncryptedPtr = std::shared_ptr<DiskEncrypted>;
using namespace FileEncryption;
String unhexKey(const String & hex, const String & disk_name)
{
try
{
return boost::algorithm::unhex(hex);
}
catch (const std::exception &)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex for disk {}, check for valid characters [0-9a-fA-F] and length", disk_name);
}
}
struct DiskEncryptedSettings
{
String key;
DiskPtr wrapped_disk;
String path_on_wrapped_disk;
DiskEncryptedSettings(
const String & disk_name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const DisksMap & map)
{
String wrapped_disk_name = config.getString(config_prefix + ".disk", "");
if (wrapped_disk_name.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Name of the wrapped disk must not be empty. An encrypted disk is a wrapper over another disk. "
"Disk {}", disk_name);
key = config.getString(config_prefix + ".key", "");
String key_hex = config.getString(config_prefix + ".key_hex", "");
if (!key.empty() && !key_hex.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Both 'key' and 'key_hex' are specified. There should be only one. Disk {}", disk_name);
if (!key_hex.empty())
{
assert(key.empty());
key = unhexKey(key_hex, disk_name);
}
if (key.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key of the encrypted disk must not be empty. Disk {}", disk_name);
if (!FileEncryption::isKeyLengthSupported(key.length()))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Key length is not supported, supported only keys of length 16, 24, or 32 bytes. Disk {}", disk_name);
auto wrapped_disk_it = map.find(wrapped_disk_name);
if (wrapped_disk_it == map.end())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"The wrapped disk must have been announced earlier. No disk with name {}. Disk {}",
wrapped_disk_name, disk_name);
wrapped_disk = wrapped_disk_it->second;
path_on_wrapped_disk = config.getString(config_prefix + ".path", "");
}
};
bool inline isSameDiskType(const IDisk & one, const IDisk & another)
{
return typeid(one) == typeid(another);
}
}
class DiskEncryptedReservation : public IReservation
{
@ -57,23 +126,45 @@ ReservationPtr DiskEncrypted::reserve(UInt64 bytes)
DiskEncrypted::DiskEncrypted(const String & name_, DiskPtr disk_, const String & key_, const String & path_)
: DiskDecorator(disk_)
, name(name_), key(key_), disk_path(path_)
, disk_absolute_path(delegate->getPath() + disk_path)
{
initialize();
}
void DiskEncrypted::initialize()
{
disk_absolute_path = delegate->getPath() + disk_path;
// use wrapped_disk as an EncryptedDisk store
if (disk_path.empty())
return;
if (disk_path.back() != '/')
throw Exception("Disk path must ends with '/', but '" + disk_path + "' doesn't.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Disk path must ends with '/', but '" + disk_path + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);
delegate->createDirectories(disk_path);
}
void DiskEncrypted::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
{
/// Check if we can copy the file without deciphering.
if (isSameDiskType(*this, *to_disk))
{
/// Disk type is the same, check if the key is the same too.
if (auto * to_encrypted_disk = typeid_cast<DiskEncrypted *>(to_disk.get()))
{
if (key == to_encrypted_disk->key)
{
/// Key is the same so we can simply copy the encrypted file.
delegate->copy(wrappedPath(from_path), to_encrypted_disk->delegate, wrappedPath(to_path));
return;
}
}
}
/// Copy the file through buffers with deciphering.
copyThroughBuffers(from_path, to_disk, to_path);
}
std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
const String & path,
size_t buf_size,
@ -85,37 +176,28 @@ std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
auto wrapped_path = wrappedPath(path);
auto buffer = delegate->readFile(wrapped_path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache);
String iv;
size_t offset = 0;
if (exists(path) && getFileSize(path))
{
iv = readIV(kIVSize, *buffer);
offset = kIVSize;
}
else
iv = randomString(kIVSize);
return std::make_unique<ReadBufferFromEncryptedFile>(buf_size, std::move(buffer), iv, key, offset);
InitVector iv;
iv.read(*buffer);
return std::make_unique<ReadBufferFromEncryptedFile>(buf_size, std::move(buffer), key, iv);
}
std::unique_ptr<WriteBufferFromFileBase> DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode)
{
String iv;
size_t start_offset = 0;
InitVector iv;
UInt64 old_file_size = 0;
auto wrapped_path = wrappedPath(path);
if (mode == WriteMode::Append && exists(path) && getFileSize(path))
{
auto read_buffer = delegate->readFile(wrapped_path, kIVSize);
iv = readIV(kIVSize, *read_buffer);
start_offset = getFileSize(path);
auto read_buffer = delegate->readFile(wrapped_path, InitVector::kSize);
iv.read(*read_buffer);
old_file_size = getFileSize(path);
}
else
iv = randomString(kIVSize);
iv = InitVector::random();
auto buffer = delegate->writeFile(wrapped_path, buf_size, mode);
return std::make_unique<WriteBufferFromEncryptedFile>(buf_size, std::move(buffer), iv, key, start_offset);
return std::make_unique<WriteBufferFromEncryptedFile>(buf_size, std::move(buffer), key, iv, old_file_size);
}
@ -123,13 +205,13 @@ size_t DiskEncrypted::getFileSize(const String & path) const
{
auto wrapped_path = wrappedPath(path);
size_t size = delegate->getFileSize(wrapped_path);
return size > kIVSize ? (size - kIVSize) : 0;
return size > InitVector::kSize ? (size - InitVector::kSize) : 0;
}
void DiskEncrypted::truncateFile(const String & path, size_t size)
{
auto wrapped_path = wrappedPath(path);
delegate->truncateFile(wrapped_path, size ? (size + kIVSize) : 0);
delegate->truncateFile(wrapped_path, size ? (size + InitVector::kSize) : 0);
}
SyncGuardPtr DiskEncrypted::getDirectorySyncGuard(const String & path) const
@ -144,22 +226,10 @@ void DiskEncrypted::applyNewSettings(
const String & config_prefix,
const DisksMap & map)
{
String wrapped_disk_name = config.getString(config_prefix + ".disk", "");
if (wrapped_disk_name.empty())
throw Exception("The wrapped disk name can not be empty. An encrypted disk is a wrapper over another disk. "
"Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
key = config.getString(config_prefix + ".key", "");
if (key.empty())
throw Exception("Encrypted disk key can not be empty. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
auto wrapped_disk = map.find(wrapped_disk_name);
if (wrapped_disk == map.end())
throw Exception("The wrapped disk must have been announced earlier. No disk with name " + wrapped_disk_name + ". Disk " + name,
ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
delegate = wrapped_disk->second;
disk_path = config.getString(config_prefix + ".path", "");
DiskEncryptedSettings settings{name, config, config_prefix, map};
key = settings.key;
delegate = settings.wrapped_disk;
disk_path = settings.path_on_wrapped_disk;
initialize();
}
@ -169,28 +239,10 @@ void registerDiskEncrypted(DiskFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr /*context*/,
const DisksMap & map) -> DiskPtr {
String wrapped_disk_name = config.getString(config_prefix + ".disk", "");
if (wrapped_disk_name.empty())
throw Exception("The wrapped disk name can not be empty. An encrypted disk is a wrapper over another disk. "
"Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
String key = config.getString(config_prefix + ".key", "");
if (key.empty())
throw Exception("Encrypted disk key can not be empty. Disk " + name, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
if (key.size() != cipherKeyLength(defaultCipher()))
throw Exception("Expected key with size " + std::to_string(cipherKeyLength(defaultCipher())) + ", got key with size " + std::to_string(key.size()),
ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
auto wrapped_disk = map.find(wrapped_disk_name);
if (wrapped_disk == map.end())
throw Exception("The wrapped disk must have been announced earlier. No disk with name " + wrapped_disk_name + ". Disk " + name,
ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
String relative_path = config.getString(config_prefix + ".path", "");
return std::make_shared<DiskEncrypted>(name, wrapped_disk->second, key, relative_path);
const DisksMap & map) -> DiskPtr
{
DiskEncryptedSettings settings{name, config, config_prefix, map};
return std::make_shared<DiskEncrypted>(name, settings.wrapped_disk, settings.key, settings.path_on_wrapped_disk);
};
factory.registerDiskType("encrypted", creator);
}

View File

@ -14,6 +14,9 @@ namespace DB
class ReadBufferFromFileBase;
class WriteBufferFromFileBase;
/// Encrypted disk ciphers all written files on the fly and writes the encrypted files to an underlying (normal) disk.
/// And when we read files from an encrypted disk it deciphers them automatically,
/// so we can work with a encrypted disk like it's a normal disk.
class DiskEncrypted : public DiskDecorator
{
public:
@ -102,10 +105,7 @@ public:
delegate->listFiles(wrapped_path, file_names);
}
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override
{
IDisk::copy(from_path, to_disk, to_path);
}
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,

View File

@ -309,7 +309,7 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to
fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
}
else
IDisk::copy(from_path, to_disk, to_path); /// Copy files through buffers.
copyThroughBuffers(from_path, to_disk, to_path); /// Base implementation.
}
SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const

View File

@ -58,7 +58,7 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
}
}
void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
{
auto & exec = to_disk->getExecutor();
ResultsCollector results;
@ -71,6 +71,11 @@ void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_dis
result.get();
}
void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
{
copyThroughBuffers(from_path, to_disk, to_path);
}
void IDisk::truncateFile(const String &, size_t)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate operation is not implemented for disk of type {}", getType());

View File

@ -246,6 +246,11 @@ protected:
/// Returns executor to perform asynchronous operations.
virtual Executor & getExecutor() { return *executor; }
/// Base implementation of the function copy().
/// It just opens two files, reads data by portions from the first file, and writes it to the second one.
/// A derived class may override copy() to provide a faster implementation.
void copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
private:
std::unique_ptr<Executor> executor;
};

View File

@ -60,6 +60,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.avro.output_codec = settings.output_format_avro_codec;
format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval;
format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString();
format_settings.avro.string_column_pattern = settings.output_format_avro_string_column_pattern.toString();
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line;

View File

@ -61,6 +61,7 @@ struct FormatSettings
String output_codec;
UInt64 output_sync_interval = 16 * 1024;
bool allow_missing_fields = false;
String string_column_pattern;
} avro;
struct CSV
@ -169,4 +170,3 @@ struct FormatSettings
};
}

View File

@ -42,11 +42,6 @@ struct ToYearWeekImpl
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
static inline UInt32 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(ExtendedDayNum (d), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
return yw.first * 100 + yw.second;
}
static inline UInt32 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode | static_cast<UInt32>(WeekModeFlag::YEAR));
@ -70,10 +65,6 @@ struct ToStartOfWeekImpl
return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode);
// return time_zone.toFirstDayNumOfWeek(t, week_mode);
}
static inline UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode);
}
static inline UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode);
@ -97,11 +88,6 @@ struct ToWeekImpl
YearWeek yw = time_zone.toYearWeek(time_zone.toDayNum(t), week_mode);
return yw.second;
}
static inline UInt8 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(ExtendedDayNum(d), week_mode);
return yw.second;
}
static inline UInt8 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone)
{
YearWeek yw = time_zone.toYearWeek(DayNum(d), week_mode);

View File

@ -46,7 +46,6 @@ struct ZeroTransform
{
static inline UInt16 execute(Int64, const DateLUTImpl &) { return 0; }
static inline UInt16 execute(UInt32, const DateLUTImpl &) { return 0; }
static inline UInt16 execute(Int32, const DateLUTImpl &) { return 0; }
static inline UInt16 execute(UInt16, const DateLUTImpl &) { return 0; }
};
@ -62,10 +61,6 @@ struct ToDateImpl
{
return UInt16(time_zone.toDayNum(t));
}
static inline UInt16 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl &)
{
return d;
@ -87,10 +82,6 @@ struct ToStartOfDayImpl
{
return time_zone.toDate(t);
}
static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toDate(ExtendedDayNum(d));
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toDate(ExtendedDayNum(d));
@ -113,10 +104,6 @@ struct ToMondayImpl
//return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t));
return time_zone.toFirstDayNumOfWeek(t);
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d));
@ -137,10 +124,6 @@ struct ToStartOfMonthImpl
{
return time_zone.toFirstDayNumOfMonth(time_zone.toDayNum(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfMonth(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfMonth(ExtendedDayNum(d));
@ -161,10 +144,6 @@ struct ToStartOfQuarterImpl
{
return time_zone.toFirstDayNumOfQuarter(time_zone.toDayNum(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(d));
@ -185,10 +164,6 @@ struct ToStartOfYearImpl
{
return time_zone.toFirstDayNumOfYear(time_zone.toDayNum(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfYear(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfYear(ExtendedDayNum(d));
@ -211,10 +186,7 @@ struct ToTimeImpl
{
return time_zone.toTime(t) + 86400;
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -235,10 +207,6 @@ struct ToStartOfMinuteImpl
{
return time_zone.toStartOfMinute(t);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -274,10 +242,6 @@ struct ToStartOfSecondImpl
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -298,10 +262,6 @@ struct ToStartOfFiveMinuteImpl
{
return time_zone.toStartOfFiveMinute(t);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -322,10 +282,6 @@ struct ToStartOfTenMinutesImpl
{
return time_zone.toStartOfTenMinutes(t);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -346,10 +302,6 @@ struct ToStartOfFifteenMinutesImpl
{
return time_zone.toStartOfFifteenMinutes(t);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -374,11 +326,6 @@ struct TimeSlotImpl
return t / 1800 * 1800;
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -401,11 +348,6 @@ struct ToStartOfHourImpl
return time_zone.toStartOfHour(t);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -426,10 +368,6 @@ struct ToYearImpl
{
return time_zone.toYear(t);
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toYear(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toYear(ExtendedDayNum(d));
@ -450,10 +388,6 @@ struct ToQuarterImpl
{
return time_zone.toQuarter(t);
}
static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toQuarter(ExtendedDayNum(d));
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toQuarter(ExtendedDayNum(d));
@ -474,10 +408,6 @@ struct ToMonthImpl
{
return time_zone.toMonth(t);
}
static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toMonth(ExtendedDayNum(d));
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toMonth(ExtendedDayNum(d));
@ -498,10 +428,6 @@ struct ToDayOfMonthImpl
{
return time_zone.toDayOfMonth(t);
}
static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfMonth(ExtendedDayNum(d));
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfMonth(ExtendedDayNum(d));
@ -522,10 +448,6 @@ struct ToDayOfWeekImpl
{
return time_zone.toDayOfWeek(t);
}
static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfWeek(ExtendedDayNum(d));
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfWeek(ExtendedDayNum(d));
@ -546,10 +468,6 @@ struct ToDayOfYearImpl
{
return time_zone.toDayOfYear(t);
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfYear(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfYear(ExtendedDayNum(d));
@ -570,10 +488,7 @@ struct ToHourImpl
{
return time_zone.toHour(t);
}
static inline UInt8 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt8 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -596,11 +511,6 @@ struct TimezoneOffsetImpl
return time_zone.timezoneOffset(t);
}
static inline time_t execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline time_t execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -621,10 +531,6 @@ struct ToMinuteImpl
{
return time_zone.toMinute(t);
}
static inline UInt8 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt8 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -645,10 +551,6 @@ struct ToSecondImpl
{
return time_zone.toSecond(t);
}
static inline UInt8 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt8 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
@ -669,10 +571,6 @@ struct ToISOYearImpl
{
return time_zone.toISOYear(time_zone.toDayNum(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toISOYear(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toISOYear(ExtendedDayNum(d));
@ -693,10 +591,6 @@ struct ToStartOfISOYearImpl
{
return time_zone.toFirstDayNumOfISOYear(time_zone.toDayNum(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfISOYear(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toFirstDayNumOfISOYear(ExtendedDayNum(d));
@ -717,10 +611,6 @@ struct ToISOWeekImpl
{
return time_zone.toISOWeek(time_zone.toDayNum(t));
}
static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toISOWeek(ExtendedDayNum(d));
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toISOWeek(ExtendedDayNum(d));
@ -741,10 +631,6 @@ struct ToRelativeYearNumImpl
{
return time_zone.toYear(static_cast<time_t>(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toYear(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toYear(ExtendedDayNum(d));
@ -765,10 +651,6 @@ struct ToRelativeQuarterNumImpl
{
return time_zone.toRelativeQuarterNum(static_cast<time_t>(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeQuarterNum(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeQuarterNum(ExtendedDayNum(d));
@ -789,10 +671,6 @@ struct ToRelativeMonthNumImpl
{
return time_zone.toRelativeMonthNum(static_cast<time_t>(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeMonthNum(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeMonthNum(ExtendedDayNum(d));
@ -813,10 +691,6 @@ struct ToRelativeWeekNumImpl
{
return time_zone.toRelativeWeekNum(static_cast<time_t>(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeWeekNum(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeWeekNum(ExtendedDayNum(d));
@ -837,10 +711,6 @@ struct ToRelativeDayNumImpl
{
return time_zone.toDayNum(static_cast<time_t>(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl &)
{
return static_cast<ExtendedDayNum>(d);
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl &)
{
return static_cast<DayNum>(d);
@ -862,10 +732,6 @@ struct ToRelativeHourNumImpl
{
return time_zone.toRelativeHourNum(static_cast<time_t>(t));
}
static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeHourNum(ExtendedDayNum(d));
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeHourNum(ExtendedDayNum(d));
@ -886,10 +752,6 @@ struct ToRelativeMinuteNumImpl
{
return time_zone.toRelativeMinuteNum(static_cast<time_t>(t));
}
static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeMinuteNum(ExtendedDayNum(d));
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toRelativeMinuteNum(ExtendedDayNum(d));
@ -910,10 +772,6 @@ struct ToRelativeSecondNumImpl
{
return t;
}
static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.fromDayNum(ExtendedDayNum(d));
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.fromDayNum(ExtendedDayNum(d));
@ -934,10 +792,6 @@ struct ToYYYYMMImpl
{
return time_zone.toNumYYYYMM(t);
}
static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toNumYYYYMM(static_cast<ExtendedDayNum>(d));
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toNumYYYYMM(static_cast<DayNum>(d));
@ -958,10 +812,6 @@ struct ToYYYYMMDDImpl
{
return time_zone.toNumYYYYMMDD(t);
}
static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toNumYYYYMMDD(static_cast<ExtendedDayNum>(d));
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toNumYYYYMMDD(static_cast<DayNum>(d));
@ -982,10 +832,6 @@ struct ToYYYYMMDDhhmmssImpl
{
return time_zone.toNumYYYYMMDDhhmmss(t);
}
static inline UInt64 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(static_cast<ExtendedDayNum>(d)));
}
static inline UInt64 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(static_cast<DayNum>(d)));

View File

@ -1,6 +1,5 @@
#pragma once
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <Functions/CustomWeekTransforms.h>
@ -36,7 +35,7 @@ public:
{
if (arguments.size() == 1)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+ ". Should be a date or a date with time",
@ -44,7 +43,7 @@ public:
}
else if (arguments.size() == 2)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+ ". Should be a date or a date with time",
@ -60,7 +59,7 @@ public:
}
else if (arguments.size() == 3)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+ ". Should be a date or a date with time",
@ -106,9 +105,6 @@ public:
if (which.isDate())
return CustomWeekTransformImpl<DataTypeDate, ToDataType>::execute(
arguments, result_type, input_rows_count, Transform{});
else if (which.isDate32())
return CustomWeekTransformImpl<DataTypeDate32, ToDataType>::execute(
arguments, result_type, input_rows_count, Transform{});
else if (which.isDateTime())
return CustomWeekTransformImpl<DataTypeDateTime, ToDataType>::execute(
arguments, result_type, input_rows_count, Transform{});

View File

@ -2,7 +2,6 @@
#include <common/DateLUTImpl.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
@ -51,11 +50,7 @@ struct AddSecondsImpl
{
return t + delta;
}
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
{
// use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.fromDayNum(ExtendedDayNum(d)) + delta;
@ -76,11 +71,7 @@ struct AddMinutesImpl
{
return t + delta * 60;
}
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
{
// use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60;
@ -100,11 +91,7 @@ struct AddHoursImpl
{
return t + delta * 3600;
}
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
{
// use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600;
@ -130,11 +117,6 @@ struct AddDaysImpl
{
return d + delta;
}
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &)
{
return d + delta;
}
};
struct AddWeeksImpl
@ -142,22 +124,17 @@ struct AddWeeksImpl
static constexpr auto name = "addWeeks";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone)
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addWeeks(t.whole, delta), t.fractional};
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.addWeeks(t, delta);
}
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &)
{
return d + delta * 7;
}
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
{
return d + delta * 7;
}
@ -182,11 +159,6 @@ struct AddMonthsImpl
{
return time_zone.addMonths(ExtendedDayNum(d), delta);
}
static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.addMonths(ExtendedDayNum(d), delta);
}
};
struct AddQuartersImpl
@ -194,22 +166,17 @@ struct AddQuartersImpl
static constexpr auto name = "addQuarters";
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone)
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
{
return {time_zone.addQuarters(t.whole, delta), t.fractional};
}
static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone)
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.addQuarters(t, delta);
}
static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone)
{
return time_zone.addQuarters(ExtendedDayNum(d), delta);
}
static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone)
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.addQuarters(ExtendedDayNum(d), delta);
}
@ -234,11 +201,6 @@ struct AddYearsImpl
{
return time_zone.addYears(ExtendedDayNum(d), delta);
}
static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
{
return time_zone.addYears(ExtendedDayNum(d), delta);
}
};
template <typename Transform>
@ -380,7 +342,7 @@ template <typename FieldType> struct ResultDataTypeMap {};
template <> struct ResultDataTypeMap<UInt16> { using ResultDataType = DataTypeDate; };
template <> struct ResultDataTypeMap<Int16> { using ResultDataType = DataTypeDate; };
template <> struct ResultDataTypeMap<UInt32> { using ResultDataType = DataTypeDateTime; };
template <> struct ResultDataTypeMap<Int32> { using ResultDataType = DataTypeDate32; };
template <> struct ResultDataTypeMap<Int32> { using ResultDataType = DataTypeDateTime; };
template <> struct ResultDataTypeMap<DateTime64> { using ResultDataType = DataTypeDateTime64; };
template <> struct ResultDataTypeMap<Int64> { using ResultDataType = DataTypeDateTime64; };
}
@ -413,7 +375,7 @@ public:
if (arguments.size() == 2)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() +
". Should be a date or a date with time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
@ -436,8 +398,6 @@ public:
{
case TypeIndex::Date:
return resolveReturnType<DataTypeDate>(arguments);
case TypeIndex::Date32:
return resolveReturnType<DataTypeDate32>(arguments);
case TypeIndex::DateTime:
return resolveReturnType<DataTypeDateTime>(arguments);
case TypeIndex::DateTime64:
@ -477,23 +437,16 @@ public:
if constexpr (std::is_same_v<ResultDataType, DataTypeDate>)
return std::make_shared<DataTypeDate>();
else if constexpr (std::is_same_v<ResultDataType, DataTypeDate32>)
return std::make_shared<DataTypeDate32>();
else if constexpr (std::is_same_v<ResultDataType, DataTypeDateTime>)
{
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
}
else if constexpr (std::is_same_v<ResultDataType, DataTypeDateTime64>)
{
if (typeid_cast<const DataTypeDateTime64 *>(arguments[0].type.get()))
{
const auto & datetime64_type = assert_cast<const DataTypeDateTime64 &>(*arguments[0].type);
return std::make_shared<DataTypeDateTime64>(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
}
else
{
return std::make_shared<DataTypeDateTime64>(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
}
// TODO (vnemkov): what if there is an overload of Transform::execute() that returns DateTime64 from DateTime or Date ?
// Shall we use the default scale or one from optional argument ?
const auto & datetime64_type = assert_cast<const DataTypeDateTime64 &>(*arguments[0].type);
return std::make_shared<DataTypeDateTime64>(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
}
else
{
@ -517,11 +470,6 @@ public:
return DateTimeAddIntervalImpl<DataTypeDate, TransformResultDataType<DataTypeDate>, Transform>::execute(
Transform{}, arguments, result_type);
}
else if (which.isDate32())
{
return DateTimeAddIntervalImpl<DataTypeDate32, TransformResultDataType<DataTypeDate32>, Transform>::execute(
Transform{}, arguments, result_type);
}
else if (which.isDateTime())
{
return DateTimeAddIntervalImpl<DataTypeDateTime, TransformResultDataType<DataTypeDateTime>, Transform>::execute(

View File

@ -1,6 +1,5 @@
#pragma once
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Functions/IFunction.h>
#include <DataTypes/DataTypeDateTime64.h>
@ -39,7 +38,7 @@ public:
{
if (arguments.size() == 1)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+ ". Should be a date or a date with time",
@ -47,7 +46,7 @@ public:
}
else if (arguments.size() == 2)
{
if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName()
+ ". Should be a date or a date with time",
@ -58,7 +57,7 @@ public:
"must be of type Date or DateTime. The 2nd argument (optional) must be "
"a constant string with timezone name",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if ((isDate(arguments[0].type) || isDate32(arguments[0].type)) && (std::is_same_v<ToDataType, DataTypeDate> || std::is_same_v<ToDataType, DataTypeDate32>))
if (isDate(arguments[0].type) && std::is_same_v<ToDataType, DataTypeDate>)
throw Exception(
"The timezone argument of function " + getName() + " is allowed only when the 1st argument has the type DateTime",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -104,8 +103,6 @@ public:
if (which.isDate())
return DateTimeTransformImpl<DataTypeDate, ToDataType, Transform>::execute(arguments, result_type, input_rows_count);
else if (which.isDate32())
return DateTimeTransformImpl<DataTypeDate32, ToDataType, Transform>::execute(arguments, result_type, input_rows_count);
else if (which.isDateTime())
return DateTimeTransformImpl<DataTypeDateTime, ToDataType, Transform>::execute(arguments, result_type, input_rows_count);
else if (which.isDateTime64())
@ -149,12 +146,6 @@ public:
== Transform::FactorTransform::execute(UInt16(right.get<UInt64>()), date_lut)
? is_monotonic : is_not_monotonic;
}
else if (checkAndGetDataType<DataTypeDate32>(&type))
{
return Transform::FactorTransform::execute(Int32(left.get<UInt64>()), date_lut)
== Transform::FactorTransform::execute(Int32(right.get<UInt64>()), date_lut)
? is_monotonic : is_not_monotonic;
}
else
{
return Transform::FactorTransform::execute(UInt32(left.get<UInt64>()), date_lut)

View File

@ -1081,7 +1081,7 @@ public:
const DataTypeTuple * right_tuple = checkAndGetDataType<DataTypeTuple>(arguments[1].get());
bool both_represented_by_number = arguments[0]->isValueRepresentedByNumber() && arguments[1]->isValueRepresentedByNumber();
bool has_date = left.isDateOrDate32() || right.isDateOrDate32();
bool has_date = left.isDate() || right.isDate();
if (!((both_represented_by_number && !has_date) /// Do not allow to compare date and number.
|| (left.isStringOrFixedString() || right.isStringOrFixedString()) /// Everything can be compared with string by conversion.

View File

@ -32,7 +32,7 @@ void registerFunctionsConversion(FunctionFactory & factory)
factory.registerFunction<FunctionToDate>();
/// MysQL compatibility alias.
factory.registerFunction<FunctionToDate>("DATE", FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionToDate32>();
factory.registerFunction<FunctionToDateTime>();
factory.registerFunction<FunctionToDateTime32>();
factory.registerFunction<FunctionToDateTime64>();
@ -62,7 +62,6 @@ void registerFunctionsConversion(FunctionFactory & factory)
factory.registerFunction<FunctionToFloat32OrZero>();
factory.registerFunction<FunctionToFloat64OrZero>();
factory.registerFunction<FunctionToDateOrZero>();
factory.registerFunction<FunctionToDate32OrZero>();
factory.registerFunction<FunctionToDateTimeOrZero>();
factory.registerFunction<FunctionToDateTime64OrZero>();
@ -88,7 +87,6 @@ void registerFunctionsConversion(FunctionFactory & factory)
factory.registerFunction<FunctionToFloat32OrNull>();
factory.registerFunction<FunctionToFloat64OrNull>();
factory.registerFunction<FunctionToDateOrNull>();
factory.registerFunction<FunctionToDate32OrNull>();
factory.registerFunction<FunctionToDateTimeOrNull>();
factory.registerFunction<FunctionToDateTime64OrNull>();

View File

@ -12,7 +12,6 @@
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeEnum.h>
@ -710,8 +709,6 @@ struct ConvertImpl<FromDataType, std::enable_if_t<!std::is_same_v<FromDataType,
if constexpr (std::is_same_v<FromDataType, DataTypeDate>)
data_to.resize(size * (strlen("YYYY-MM-DD") + 1));
else if constexpr (std::is_same_v<FromDataType, DataTypeDate32>)
data_to.resize(size * (strlen("YYYY-MM-DD") + 1));
else if constexpr (std::is_same_v<FromDataType, DataTypeDateTime>)
data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1));
else if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
@ -814,14 +811,6 @@ inline void parseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer & rb
x = tmp;
}
template <>
inline void parseImpl<DataTypeDate32>(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
{
ExtendedDayNum tmp(0);
readDateText(tmp, rb);
x = tmp;
}
// NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code.
template <>
inline void parseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
@ -862,16 +851,6 @@ inline bool tryParseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer &
return true;
}
template <>
inline bool tryParseImpl<DataTypeDate32>(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
{
ExtendedDayNum tmp(0);
if (!tryReadDateText(tmp, rb))
return false;
x = tmp;
return true;
}
template <>
inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
{
@ -1296,7 +1275,6 @@ struct ConvertImpl<DataTypeFixedString, DataTypeString, Name, ConvertDefaultBeha
/// Declared early because used below.
struct NameToDate { static constexpr auto name = "toDate"; };
struct NameToDate32 { static constexpr auto name = "toDate32"; };
struct NameToDateTime { static constexpr auto name = "toDateTime"; };
struct NameToDateTime32 { static constexpr auto name = "toDateTime32"; };
struct NameToDateTime64 { static constexpr auto name = "toDateTime64"; };
@ -1979,7 +1957,7 @@ struct ToDateMonotonicity
static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right)
{
auto which = WhichDataType(type);
if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16())
if (which.isDate() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16())
return {true, true, true};
else if (
(which.isUInt() && ((left.isNull() || left.get<UInt64>() < 0xFFFF) && (right.isNull() || right.get<UInt64>() >= 0xFFFF)))
@ -2080,7 +2058,6 @@ using FunctionToInt256 = FunctionConvert<DataTypeInt256, NameToInt256, ToNumberM
using FunctionToFloat32 = FunctionConvert<DataTypeFloat32, NameToFloat32, ToNumberMonotonicity<Float32>>;
using FunctionToFloat64 = FunctionConvert<DataTypeFloat64, NameToFloat64, ToNumberMonotonicity<Float64>>;
using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate, ToDateMonotonicity>;
using FunctionToDate32 = FunctionConvert<DataTypeDate32, NameToDate32, ToDateMonotonicity>;
using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime, ToDateTimeMonotonicity>;
using FunctionToDateTime32 = FunctionConvert<DataTypeDateTime, NameToDateTime32, ToDateTimeMonotonicity>;
using FunctionToDateTime64 = FunctionConvert<DataTypeDateTime64, NameToDateTime64, UnknownMonotonicity>;
@ -2140,7 +2117,6 @@ struct NameToInt256OrZero { static constexpr auto name = "toInt256OrZero"; };
struct NameToFloat32OrZero { static constexpr auto name = "toFloat32OrZero"; };
struct NameToFloat64OrZero { static constexpr auto name = "toFloat64OrZero"; };
struct NameToDateOrZero { static constexpr auto name = "toDateOrZero"; };
struct NameToDate32OrZero { static constexpr auto name = "toDate32OrZero"; };
struct NameToDateTimeOrZero { static constexpr auto name = "toDateTimeOrZero"; };
struct NameToDateTime64OrZero { static constexpr auto name = "toDateTime64OrZero"; };
struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; };
@ -2164,7 +2140,6 @@ using FunctionToInt256OrZero = FunctionConvertFromString<DataTypeInt256, NameToI
using FunctionToFloat32OrZero = FunctionConvertFromString<DataTypeFloat32, NameToFloat32OrZero, ConvertFromStringExceptionMode::Zero>;
using FunctionToFloat64OrZero = FunctionConvertFromString<DataTypeFloat64, NameToFloat64OrZero, ConvertFromStringExceptionMode::Zero>;
using FunctionToDateOrZero = FunctionConvertFromString<DataTypeDate, NameToDateOrZero, ConvertFromStringExceptionMode::Zero>;
using FunctionToDate32OrZero = FunctionConvertFromString<DataTypeDate32, NameToDate32OrZero, ConvertFromStringExceptionMode::Zero>;
using FunctionToDateTimeOrZero = FunctionConvertFromString<DataTypeDateTime, NameToDateTimeOrZero, ConvertFromStringExceptionMode::Zero>;
using FunctionToDateTime64OrZero = FunctionConvertFromString<DataTypeDateTime64, NameToDateTime64OrZero, ConvertFromStringExceptionMode::Zero>;
using FunctionToDecimal32OrZero = FunctionConvertFromString<DataTypeDecimal<Decimal32>, NameToDecimal32OrZero, ConvertFromStringExceptionMode::Zero>;
@ -2188,7 +2163,6 @@ struct NameToInt256OrNull { static constexpr auto name = "toInt256OrNull"; };
struct NameToFloat32OrNull { static constexpr auto name = "toFloat32OrNull"; };
struct NameToFloat64OrNull { static constexpr auto name = "toFloat64OrNull"; };
struct NameToDateOrNull { static constexpr auto name = "toDateOrNull"; };
struct NameToDate32OrNull { static constexpr auto name = "toDate32OrNull"; };
struct NameToDateTimeOrNull { static constexpr auto name = "toDateTimeOrNull"; };
struct NameToDateTime64OrNull { static constexpr auto name = "toDateTime64OrNull"; };
struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; };
@ -2212,7 +2186,6 @@ using FunctionToInt256OrNull = FunctionConvertFromString<DataTypeInt256, NameToI
using FunctionToFloat32OrNull = FunctionConvertFromString<DataTypeFloat32, NameToFloat32OrNull, ConvertFromStringExceptionMode::Null>;
using FunctionToFloat64OrNull = FunctionConvertFromString<DataTypeFloat64, NameToFloat64OrNull, ConvertFromStringExceptionMode::Null>;
using FunctionToDateOrNull = FunctionConvertFromString<DataTypeDate, NameToDateOrNull, ConvertFromStringExceptionMode::Null>;
using FunctionToDate32OrNull = FunctionConvertFromString<DataTypeDate32, NameToDate32OrNull, ConvertFromStringExceptionMode::Null>;
using FunctionToDateTimeOrNull = FunctionConvertFromString<DataTypeDateTime, NameToDateTimeOrNull, ConvertFromStringExceptionMode::Null>;
using FunctionToDateTime64OrNull = FunctionConvertFromString<DataTypeDateTime64, NameToDateTime64OrNull, ConvertFromStringExceptionMode::Null>;
using FunctionToDecimal32OrNull = FunctionConvertFromString<DataTypeDecimal<Decimal32>, NameToDecimal32OrNull, ConvertFromStringExceptionMode::Null>;

View File

@ -683,8 +683,6 @@ public:
return executeType<Int64>(arguments);
else if (which.isDate())
return executeType<UInt16>(arguments);
else if (which.isDate32())
return executeType<Int32>(arguments);
else if (which.isDateTime())
return executeType<UInt32>(arguments);
else if (which.isDecimal32())
@ -988,7 +986,6 @@ private:
else if (which.isEnum8()) executeIntType<Int8, first>(icolumn, vec_to);
else if (which.isEnum16()) executeIntType<Int16, first>(icolumn, vec_to);
else if (which.isDate()) executeIntType<UInt16, first>(icolumn, vec_to);
else if (which.isDate32()) executeIntType<Int32, first>(icolumn, vec_to);
else if (which.isDateTime()) executeIntType<UInt32, first>(icolumn, vec_to);
/// TODO: executeIntType() for Decimal32/64 leads to incompatible result
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(icolumn, vec_to);

View File

@ -58,10 +58,10 @@ struct CountEqualAction
namespace Impl
{
template <
class ConcreteAction,
typename ConcreteAction,
bool RightArgIsConstant = false,
class IntegralInitial = UInt64,
class IntegralResult = UInt64>
typename IntegralInitial = UInt64,
typename IntegralResult = UInt64>
struct Main
{
private:
@ -94,13 +94,13 @@ private:
}
/// LowCardinality
static bool compare(const IColumn & left, const Result& right, size_t i, size_t)
static bool compare(const IColumn & left, const Result & right, size_t i, size_t)
{
return left.getUInt(i) == right;
}
/// Generic
static bool compare(const IColumn& left, const IColumn& right, size_t i, size_t j)
static bool compare(const IColumn & left, const IColumn & right, size_t i, size_t j)
{
return 0 == left.compareAt(i, RightArgIsConstant ? 0 : j, right, 1);
}
@ -109,7 +109,7 @@ private:
static constexpr bool hasNull(const NullMap * const null_map, size_t i) noexcept { return (*null_map)[i]; }
template <size_t Case, class Data, class Target>
template <size_t Case, typename Data, typename Target>
static void process(
const Data & data, const ArrOffsets & offsets, const Target & target, ResultArr & result,
[[maybe_unused]] const NullMap * const null_map_data,
@ -148,7 +148,7 @@ private:
continue;
}
else if (!compare(data, target, current_offset + j, i))
continue;
continue;
ConcreteAction::apply(current, j);
@ -162,7 +162,7 @@ private:
}
public:
template <class Data, class Target>
template <typename Data, typename Target>
static void vector(
const Data & data,
const ArrOffsets & offsets,
@ -183,7 +183,7 @@ public:
};
/// When the 2nd function argument is a NULL value.
template <class ConcreteAction>
template <typename ConcreteAction>
struct Null
{
using ResultType = typename ConcreteAction::ResultType;
@ -227,7 +227,7 @@ struct Null
}
};
template <class ConcreteAction>
template <typename ConcreteAction>
struct String
{
private:
@ -350,7 +350,7 @@ public:
};
}
template <class ConcreteAction, class Name>
template <typename ConcreteAction, typename Name>
class FunctionArrayIndex : public IFunction
{
public:
@ -565,7 +565,7 @@ private:
* Integral s = {s1, s2, ...}
* (s1, s1, s2, ...), (s2, s1, s2, ...), (s3, s1, s2, ...)
*/
template <class ...Integral>
template <typename... Integral>
static inline ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments)
{
const ColumnArray * const left = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
@ -590,14 +590,14 @@ private:
return nullptr;
}
template <class ...Integral>
template <typename... Integral>
static inline bool executeIntegral(ExecutionData& data)
{
return (executeIntegralExpanded<Integral, Integral...>(data) || ...);
}
/// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ...
template <class A, class ...Other>
template <typename A, typename... Other>
static inline bool executeIntegralExpanded(ExecutionData& data)
{
return (executeIntegralImpl<A, Other>(data) || ...);
@ -608,7 +608,7 @@ private:
* second argument, namely, the @e value, so it's possible to invoke the <tt>has(Array(Int8), UInt64)</tt> e.g.
* so we have to check all possible variants for #Initial and #Resulting types.
*/
template <class Initial, class Resulting>
template <typename Initial, typename Resulting>
static bool executeIntegralImpl(ExecutionData& data)
{
const ColumnVector<Initial> * col_nested = checkAndGetColumn<ColumnVector<Initial>>(&data.left);
@ -647,7 +647,7 @@ private:
}
/**
* Catches arguments of type LC(T) (left) and U (right).
* Catches arguments of type LowCardinality(T) (left) and U (right).
*
* The perftests
* https://clickhouse-test-reports.s3.yandex.net/12550/2d27fa0fa8c198a82bf1fe3625050ccf56695976/integration_tests_(release).html
@ -726,7 +726,7 @@ private:
return col_result;
}
else if (col_lc->nestedIsNullable()) // LC(Nullable(T)) and U
else if (col_lc->nestedIsNullable()) // LowCardinality(Nullable(T)) and U
{
const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T)
const ColumnNullable& left_nullable = *checkAndGetColumn<ColumnNullable>(left_casted.get());
@ -746,16 +746,17 @@ private:
? right_nullable->getNestedColumn()
: *right_casted.get();
ExecutionData data = {
ExecutionData data =
{
left_ptr, right_ptr,
col_array->getOffsets(),
nullptr,
{null_map_left_casted, null_map_right_casted}};
if (dispatchConvertedLCColumns(data))
if (dispatchConvertedLowCardinalityColumns(data))
return data.result_column;
}
else // LC(T) and U, T not Nullable
else // LowCardinality(T) and U, T not Nullable
{
if (col_arg.isNullable())
return nullptr;
@ -764,24 +765,25 @@ private:
arg_lc && arg_lc->isNullable())
return nullptr;
// LC(T) and U (possibly LC(V))
// LowCardinality(T) and U (possibly LowCardinality(V))
const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality();
const ColumnPtr right_casted = col_arg.convertToFullColumnIfLowCardinality();
ExecutionData data = {
ExecutionData data =
{
*left_casted.get(), *right_casted.get(), col_array->getOffsets(),
nullptr, {null_map_data, null_map_item}
};
if (dispatchConvertedLCColumns(data))
if (dispatchConvertedLowCardinalityColumns(data))
return data.result_column;
}
return nullptr;
}
static bool dispatchConvertedLCColumns(ExecutionData& data)
static bool dispatchConvertedLowCardinalityColumns(ExecutionData & data)
{
if (data.left.isNumeric() && data.right.isNumeric()) // ColumnArrays
return executeIntegral<INTEGRAL_TPL_PACK>(data);

View File

@ -6,7 +6,6 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeNullable.h>
@ -408,9 +407,6 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arg
using DateMap = ClearableHashMapWithStackMemory<DataTypeDate::FieldType,
size_t, DefaultHash<DataTypeDate::FieldType>, INITIAL_SIZE_DEGREE>;
using Date32Map = ClearableHashMapWithStackMemory<DataTypeDate32::FieldType,
size_t, DefaultHash<DataTypeDate32::FieldType>, INITIAL_SIZE_DEGREE>;
using DateTimeMap = ClearableHashMapWithStackMemory<
DataTypeDateTime::FieldType, size_t,
DefaultHash<DataTypeDateTime::FieldType>, INITIAL_SIZE_DEGREE>;
@ -425,8 +421,6 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arg
if (which.isDate())
result_column = execute<DateMap, ColumnVector<DataTypeDate::FieldType>, true>(arrays, std::move(column));
else if (which.isDate32())
result_column = execute<Date32Map, ColumnVector<DataTypeDate32::FieldType>, true>(arrays, std::move(column));
else if (which.isDateTime())
result_column = execute<DateTimeMap, ColumnVector<DataTypeDateTime::FieldType>, true>(arrays, std::move(column));
else if (which.isString())

View File

@ -7,6 +7,7 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnVector.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/Context.h>
#include <numeric>
@ -31,8 +32,10 @@ class FunctionRange : public IFunction
{
public:
static constexpr auto name = "range";
static constexpr size_t max_elements = 100'000'000;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRange>(); }
const size_t max_elements;
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionRange>(std::move(context_)); }
explicit FunctionRange(ContextPtr context) : max_elements(context->getSettingsRef().function_range_max_elements_in_block) {}
private:
String getName() const override { return name; }

View File

@ -5,7 +5,6 @@
#include <Columns/ColumnNullable.h>
#include <Common/assert_cast.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypesNumber.h>
@ -116,8 +115,6 @@ private:
f(Float64());
else if (which.isDate())
f(DataTypeDate::FieldType());
else if (which.isDate32())
f(DataTypeDate::FieldType());
else if (which.isDateTime())
f(DataTypeDateTime::FieldType());
else

View File

@ -1,7 +1,6 @@
#include <common/DateLUTImpl.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeInterval.h>
@ -40,11 +39,6 @@ namespace
return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years);
}
static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years);
}
static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
@ -66,11 +60,6 @@ namespace
return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters);
}
static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters);
}
static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
@ -92,11 +81,6 @@ namespace
return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months);
}
static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months);
}
static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
@ -118,11 +102,6 @@ namespace
return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks);
}
static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks);
}
static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
@ -144,11 +123,6 @@ namespace
return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
}
static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
}
static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
@ -166,7 +140,6 @@ namespace
static constexpr auto name = function_name;
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
};
@ -178,8 +151,6 @@ namespace
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfMinuteInterval(t, minutes);
@ -198,8 +169,6 @@ namespace
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfSecondInterval(t, seconds);
@ -330,12 +299,6 @@ private:
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, time_zone);
}
if (which_type.isDate32())
{
const auto * time_column_vec = checkAndGetColumn<ColumnInt32>(time_column.column.get());
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, time_zone);
}
if (which_type.isDateTime64())
{
const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());

View File

@ -19,20 +19,70 @@ namespace ErrorCodes
namespace
{
class ExecutableFunctionToTimeZone : public IExecutableFunction
{
public:
explicit ExecutableFunctionToTimeZone() = default;
String getName() const override { return "toTimezone"; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
{
return arguments[0].column;
}
};
class FunctionBaseToTimeZone : public IFunctionBase
{
public:
FunctionBaseToTimeZone(
bool is_constant_timezone_,
DataTypes argument_types_,
DataTypePtr return_type_)
: is_constant_timezone(is_constant_timezone_)
, argument_types(std::move(argument_types_))
, return_type(std::move(return_type_)) {}
String getName() const override { return "toTimezone"; }
const DataTypes & getArgumentTypes() const override
{
return argument_types;
}
const DataTypePtr & getResultType() const override
{
return return_type;
}
ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & /*arguments*/) const override
{
return std::make_unique<ExecutableFunctionToTimeZone>();
}
bool hasInformationAboutMonotonicity() const override { return is_constant_timezone; }
Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const override
{
return {is_constant_timezone, is_constant_timezone, is_constant_timezone};
}
private:
bool is_constant_timezone;
DataTypes argument_types;
DataTypePtr return_type;
};
/// Just changes time zone information for data type. The calculation is free.
class FunctionToTimezone : public IFunction
class ToTimeZoneOverloadResolver : public IFunctionOverloadResolver
{
public:
static constexpr auto name = "toTimezone";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionToTimezone>(); }
String getName() const override
{
return name;
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
static FunctionOverloadResolverPtr create(ContextPtr) { return std::make_unique<ToTimeZoneOverloadResolver>(); }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
@ -54,9 +104,17 @@ public:
return std::make_shared<DataTypeDateTime64>(date_time64->getScale(), time_zone_name);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
return arguments[0].column;
bool is_constant_timezone = false;
if (arguments[1].column)
is_constant_timezone = isColumnConst(*arguments[1].column);
DataTypes data_types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type;
return std::make_unique<FunctionBaseToTimeZone>(is_constant_timezone, data_types, result_type);
}
};
@ -64,7 +122,7 @@ public:
void registerFunctionToTimeZone(FunctionFactory & factory)
{
factory.registerFunction<FunctionToTimezone>();
factory.registerFunction<ToTimeZoneOverloadResolver>();
factory.registerAlias("toTimeZone", "toTimezone");
}

View File

@ -1,8 +1,8 @@
#include <IO/FileEncryptionCommon.h>
#if USE_SSL
#include <IO/ReadHelpers.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
@ -23,244 +23,281 @@ namespace FileEncryption
namespace
{
String toBigEndianString(UInt128 value)
constexpr const size_t kBlockSize = 16;
size_t blockOffset(size_t pos) { return pos % kBlockSize; }
size_t blocks(size_t pos) { return pos / kBlockSize; }
size_t partBlockSize(size_t size, size_t off)
{
WriteBufferFromOwnString out;
writeBinaryBigEndian(value, out);
return std::move(out.str());
assert(off < kBlockSize);
/// write the part as usual block
if (off == 0)
return 0;
return off + size <= kBlockSize ? size : (kBlockSize - off) % kBlockSize;
}
UInt128 fromBigEndianString(const String & str)
size_t encryptBlocks(EVP_CIPHER_CTX * evp_ctx, const char * data, size_t size, WriteBuffer & out)
{
ReadBufferFromMemory in{str.data(), str.length()};
UInt128 result;
readBinaryBigEndian(result, in);
return result;
const uint8_t * in = reinterpret_cast<const uint8_t *>(data);
size_t in_size = 0;
size_t out_size = 0;
while (in_size < size)
{
out.nextIfAtEnd();
size_t part_size = std::min(size - in_size, out.available());
uint8_t * ciphertext = reinterpret_cast<uint8_t *>(out.position());
int ciphertext_size = 0;
if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, &in[in_size], part_size))
throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR);
in_size += part_size;
if (ciphertext_size)
{
out.position() += ciphertext_size;
out_size += ciphertext_size;
}
}
return out_size;
}
size_t encryptBlockWithPadding(EVP_CIPHER_CTX * evp_ctx, const char * data, size_t size, size_t pad_left, WriteBuffer & out)
{
assert((size <= kBlockSize) && (size + pad_left <= kBlockSize));
uint8_t padded_data[kBlockSize] = {};
memcpy(&padded_data[pad_left], data, size);
size_t padded_data_size = pad_left + size;
uint8_t ciphertext[kBlockSize];
int ciphertext_size = 0;
if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, padded_data, padded_data_size))
throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (!ciphertext_size)
return 0;
if (static_cast<size_t>(ciphertext_size) < pad_left)
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Unexpected size of encrypted data: {} < {}", ciphertext_size, pad_left);
uint8_t * ciphertext_begin = &ciphertext[pad_left];
ciphertext_size -= pad_left;
out.write(reinterpret_cast<const char *>(ciphertext_begin), ciphertext_size);
return ciphertext_size;
}
size_t encryptFinal(EVP_CIPHER_CTX * evp_ctx, WriteBuffer & out)
{
uint8_t ciphertext[kBlockSize];
int ciphertext_size = 0;
if (!EVP_EncryptFinal_ex(evp_ctx,
ciphertext, &ciphertext_size))
throw Exception("Failed to finalize encrypting", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (ciphertext_size)
out.write(reinterpret_cast<const char *>(ciphertext), ciphertext_size);
return ciphertext_size;
}
size_t decryptBlocks(EVP_CIPHER_CTX * evp_ctx, const char * data, size_t size, char * out)
{
const uint8_t * in = reinterpret_cast<const uint8_t *>(data);
uint8_t * plaintext = reinterpret_cast<uint8_t *>(out);
int plaintext_size = 0;
if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, in, size))
throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR);
return plaintext_size;
}
size_t decryptBlockWithPadding(EVP_CIPHER_CTX * evp_ctx, const char * data, size_t size, size_t pad_left, char * out)
{
assert((size <= kBlockSize) && (size + pad_left <= kBlockSize));
uint8_t padded_data[kBlockSize] = {};
memcpy(&padded_data[pad_left], data, size);
size_t padded_data_size = pad_left + size;
uint8_t plaintext[kBlockSize];
int plaintext_size = 0;
if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, padded_data, padded_data_size))
throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (!plaintext_size)
return 0;
if (static_cast<size_t>(plaintext_size) < pad_left)
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Unexpected size of decrypted data: {} < {}", plaintext_size, pad_left);
const uint8_t * plaintext_begin = &plaintext[pad_left];
plaintext_size -= pad_left;
memcpy(out, plaintext_begin, plaintext_size);
return plaintext_size;
}
size_t decryptFinal(EVP_CIPHER_CTX * evp_ctx, char * out)
{
uint8_t plaintext[kBlockSize];
int plaintext_size = 0;
if (!EVP_DecryptFinal_ex(evp_ctx, plaintext, &plaintext_size))
throw Exception("Failed to finalize decrypting", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (plaintext_size)
memcpy(out, plaintext, plaintext_size);
return plaintext_size;
}
}
InitVector::InitVector(const String & iv_) : iv(fromBigEndianString(iv_)) {}
const String & InitVector::str() const
String InitVector::toString() const
{
local = toBigEndianString(iv + counter);
return local;
static_assert(sizeof(counter) == InitVector::kSize);
WriteBufferFromOwnString out;
writeBinaryBigEndian(counter, out);
return std::move(out.str());
}
Encryption::Encryption(const String & iv_, const EncryptionKey & key_, size_t offset_)
: evp_cipher(defaultCipher())
, init_vector(iv_)
, key(key_)
, block_size(cipherIVLength(evp_cipher))
InitVector InitVector::fromString(const String & str)
{
if (iv_.size() != cipherIVLength(evp_cipher))
throw DB::Exception("Expected iv with size " + std::to_string(cipherIVLength(evp_cipher)) + ", got iv with size " + std::to_string(iv_.size()),
DB::ErrorCodes::DATA_ENCRYPTION_ERROR);
if (key_.size() != cipherKeyLength(evp_cipher))
throw DB::Exception("Expected key with size " + std::to_string(cipherKeyLength(evp_cipher)) + ", got iv with size " + std::to_string(key_.size()),
DB::ErrorCodes::DATA_ENCRYPTION_ERROR);
offset = offset_;
if (str.length() != InitVector::kSize)
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Expected iv with size {}, got iv with size {}", InitVector::kSize, str.length());
ReadBufferFromMemory in{str.data(), str.length()};
UInt128 counter;
readBinaryBigEndian(counter, in);
return InitVector{counter};
}
size_t Encryption::partBlockSize(size_t size, size_t off) const
void InitVector::read(ReadBuffer & in)
{
assert(off < block_size);
/// write the part as usual block
if (off == 0)
return 0;
return off + size <= block_size ? size : (block_size - off) % block_size;
readBinaryBigEndian(counter, in);
}
void Encryptor::encrypt(const char * plaintext, WriteBuffer & buf, size_t size)
void InitVector::write(WriteBuffer & out) const
{
if (!size)
return;
auto iv = InitVector(init_vector);
auto off = blockOffset(offset);
iv.set(blocks(offset));
size_t part_size = partBlockSize(size, off);
if (off)
{
buf.write(encryptPartialBlock(plaintext, part_size, iv, off).data(), part_size);
offset += part_size;
size -= part_size;
iv.inc();
}
if (size)
{
buf.write(encryptNBytes(plaintext + part_size, size, iv).data(), size);
offset += size;
}
writeBinaryBigEndian(counter, out);
}
String Encryptor::encryptPartialBlock(const char * partial_block, size_t size, const InitVector & iv, size_t off) const
InitVector InitVector::random()
{
if (size > block_size)
throw Exception("Expected partial block, got block with size > block_size: size = " + std::to_string(size) + " and offset = " + std::to_string(off),
ErrorCodes::DATA_ENCRYPTION_ERROR);
String plaintext(block_size, '\0');
for (size_t i = 0; i < size; ++i)
plaintext[i + off] = partial_block[i];
return String(encryptNBytes(plaintext.data(), block_size, iv), off, size);
}
String Encryptor::encryptNBytes(const char * data, size_t bytes, const InitVector & iv) const
{
String ciphertext(bytes, '\0');
auto * ciphertext_ref = ciphertext.data();
auto evp_ctx_ptr = std::unique_ptr<EVP_CIPHER_CTX, decltype(&::EVP_CIPHER_CTX_free)>(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
auto * evp_ctx = evp_ctx_ptr.get();
if (EVP_EncryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr) != 1)
throw Exception("Failed to initialize encryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (EVP_EncryptInit_ex(evp_ctx, nullptr, nullptr,
reinterpret_cast<const unsigned char*>(key.str().data()),
reinterpret_cast<const unsigned char*>(iv.str().data())) != 1)
throw Exception("Failed to set key and IV for encryption", ErrorCodes::DATA_ENCRYPTION_ERROR);
int output_len = 0;
if (EVP_EncryptUpdate(evp_ctx,
reinterpret_cast<unsigned char*>(ciphertext_ref), &output_len,
reinterpret_cast<const unsigned char*>(data), static_cast<int>(bytes)) != 1)
throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR);
ciphertext_ref += output_len;
int final_output_len = 0;
if (EVP_EncryptFinal_ex(evp_ctx,
reinterpret_cast<unsigned char*>(ciphertext_ref), &final_output_len) != 1)
throw Exception("Failed to fetch ciphertext", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (output_len < 0 || final_output_len < 0 || static_cast<size_t>(output_len) + static_cast<size_t>(final_output_len) != bytes)
throw Exception("Only part of the data was encrypted", ErrorCodes::DATA_ENCRYPTION_ERROR);
return ciphertext;
}
void Decryptor::decrypt(const char * ciphertext, BufferBase::Position buf, size_t size, size_t off)
{
if (!size)
return;
auto iv = InitVector(init_vector);
iv.set(blocks(off));
off = blockOffset(off);
size_t part_size = partBlockSize(size, off);
if (off)
{
decryptPartialBlock(buf, ciphertext, part_size, iv, off);
size -= part_size;
if (part_size + off == block_size)
iv.inc();
}
if (size)
decryptNBytes(buf, ciphertext + part_size, size, iv);
}
void Decryptor::decryptPartialBlock(BufferBase::Position & to, const char * partial_block, size_t size, const InitVector & iv, size_t off) const
{
if (size > block_size)
throw Exception("Expecter partial block, got block with size > block_size: size = " + std::to_string(size) + " and offset = " + std::to_string(off),
ErrorCodes::DATA_ENCRYPTION_ERROR);
String ciphertext(block_size, '\0');
String plaintext(block_size, '\0');
for (size_t i = 0; i < size; ++i)
ciphertext[i + off] = partial_block[i];
auto * plaintext_ref = plaintext.data();
decryptNBytes(plaintext_ref, ciphertext.data(), off + size, iv);
for (size_t i = 0; i < size; ++i)
*(to++) = plaintext[i + off];
}
void Decryptor::decryptNBytes(BufferBase::Position & to, const char * data, size_t bytes, const InitVector & iv) const
{
auto evp_ctx_ptr = std::unique_ptr<EVP_CIPHER_CTX, decltype(&::EVP_CIPHER_CTX_free)>(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
auto * evp_ctx = evp_ctx_ptr.get();
if (EVP_DecryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr) != 1)
throw Exception("Failed to initialize decryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (EVP_DecryptInit_ex(evp_ctx, nullptr, nullptr,
reinterpret_cast<const unsigned char*>(key.str().data()),
reinterpret_cast<const unsigned char*>(iv.str().data())) != 1)
throw Exception("Failed to set key and IV for decryption", ErrorCodes::DATA_ENCRYPTION_ERROR);
int output_len = 0;
if (EVP_DecryptUpdate(evp_ctx,
reinterpret_cast<unsigned char*>(to), &output_len,
reinterpret_cast<const unsigned char*>(data), static_cast<int>(bytes)) != 1)
throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR);
to += output_len;
int final_output_len = 0;
if (EVP_DecryptFinal_ex(evp_ctx,
reinterpret_cast<unsigned char*>(to), &final_output_len) != 1)
throw Exception("Failed to fetch plaintext", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (output_len < 0 || final_output_len < 0 || static_cast<size_t>(output_len) + static_cast<size_t>(final_output_len) != bytes)
throw Exception("Only part of the data was decrypted", ErrorCodes::DATA_ENCRYPTION_ERROR);
}
String readIV(size_t size, ReadBuffer & in)
{
String iv(size, 0);
in.readStrict(reinterpret_cast<char *>(iv.data()), size);
return iv;
}
String randomString(size_t size)
{
String iv(size, 0);
std::random_device rd;
std::mt19937 gen{rd()};
std::uniform_int_distribution<size_t> dis;
std::uniform_int_distribution<UInt128::base_type> dis;
UInt128 counter;
for (size_t i = 0; i != std::size(counter.items); ++i)
counter.items[i] = dis(gen);
return InitVector{counter};
}
char * ptr = iv.data();
while (size)
Encryptor::Encryptor(const String & key_, const InitVector & iv_)
: key(key_)
, init_vector(iv_)
{
if (key_.length() == 16)
evp_cipher = EVP_aes_128_ctr();
else if (key_.length() == 24)
evp_cipher = EVP_aes_192_ctr();
else if (key_.length() == 32)
evp_cipher = EVP_aes_256_ctr();
else
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Key length {} is not supported, supported only keys of length 128, 192, or 256 bits", key_.length());
size_t cipher_key_length = static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
if (cipher_key_length != key_.length())
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Got unexpected key length from cipher: {} != {}", cipher_key_length, key_.length());
size_t cipher_iv_length = static_cast<size_t>(EVP_CIPHER_iv_length(evp_cipher));
if (cipher_iv_length != InitVector::kSize)
throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Got unexpected init vector's length from cipher: {} != {}", cipher_iv_length, InitVector::kSize);
}
void Encryptor::encrypt(const char * data, size_t size, WriteBuffer & out)
{
if (!size)
return;
auto current_iv = (init_vector + blocks(offset)).toString();
auto evp_ctx_ptr = std::unique_ptr<EVP_CIPHER_CTX, decltype(&::EVP_CIPHER_CTX_free)>(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
auto * evp_ctx = evp_ctx_ptr.get();
if (!EVP_EncryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr))
throw Exception("Failed to initialize encryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (!EVP_EncryptInit_ex(evp_ctx, nullptr, nullptr,
reinterpret_cast<const uint8_t*>(key.c_str()), reinterpret_cast<const uint8_t*>(current_iv.c_str())))
throw Exception("Failed to set key and IV for encryption", ErrorCodes::DATA_ENCRYPTION_ERROR);
size_t in_size = 0;
size_t out_size = 0;
auto off = blockOffset(offset);
if (off)
{
auto value = dis(gen);
size_t n = std::min(size, sizeof(value));
memcpy(ptr, &value, n);
ptr += n;
size -= n;
size_t in_part_size = partBlockSize(size, off);
size_t out_part_size = encryptBlockWithPadding(evp_ctx, &data[in_size], in_part_size, off, out);
in_size += in_part_size;
out_size += out_part_size;
}
return iv;
if (in_size < size)
{
size_t in_part_size = size - in_size;
size_t out_part_size = encryptBlocks(evp_ctx, &data[in_size], in_part_size, out);
in_size += in_part_size;
out_size += out_part_size;
}
out_size += encryptFinal(evp_ctx, out);
if (out_size != in_size)
throw Exception("Only part of the data was encrypted", ErrorCodes::DATA_ENCRYPTION_ERROR);
offset += in_size;
}
void writeIV(const String & iv, WriteBuffer & out)
void Encryptor::decrypt(const char * data, size_t size, char * out)
{
out.write(iv.data(), iv.length());
if (!size)
return;
auto current_iv = (init_vector + blocks(offset)).toString();
auto evp_ctx_ptr = std::unique_ptr<EVP_CIPHER_CTX, decltype(&::EVP_CIPHER_CTX_free)>(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
auto * evp_ctx = evp_ctx_ptr.get();
if (!EVP_DecryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr))
throw Exception("Failed to initialize decryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR);
if (!EVP_DecryptInit_ex(evp_ctx, nullptr, nullptr,
reinterpret_cast<const uint8_t*>(key.c_str()), reinterpret_cast<const uint8_t*>(current_iv.c_str())))
throw Exception("Failed to set key and IV for decryption", ErrorCodes::DATA_ENCRYPTION_ERROR);
size_t in_size = 0;
size_t out_size = 0;
auto off = blockOffset(offset);
if (off)
{
size_t in_part_size = partBlockSize(size, off);
size_t out_part_size = decryptBlockWithPadding(evp_ctx, &data[in_size], in_part_size, off, &out[out_size]);
in_size += in_part_size;
out_size += out_part_size;
}
if (in_size < size)
{
size_t in_part_size = size - in_size;
size_t out_part_size = decryptBlocks(evp_ctx, &data[in_size], in_part_size, &out[out_size]);
in_size += in_part_size;
out_size += out_part_size;
}
out_size += decryptFinal(evp_ctx, &out[out_size]);
if (out_size != in_size)
throw Exception("Only part of the data was decrypted", ErrorCodes::DATA_ENCRYPTION_ERROR);
offset += in_size;
}
size_t cipherKeyLength(const EVP_CIPHER * evp_cipher)
bool isKeyLengthSupported(size_t key_length)
{
return static_cast<size_t>(EVP_CIPHER_key_length(evp_cipher));
}
size_t cipherIVLength(const EVP_CIPHER * evp_cipher)
{
return static_cast<size_t>(EVP_CIPHER_iv_length(evp_cipher));
}
const EVP_CIPHER * defaultCipher()
{
return EVP_aes_128_ctr();
return (key_length == 16) || (key_length == 24) || (key_length == 32);
}
}

View File

@ -16,87 +16,82 @@ class WriteBuffer;
namespace FileEncryption
{
constexpr size_t kIVSize = sizeof(UInt128);
/// Initialization vector. Its size is always 16 bytes.
class InitVector
{
public:
InitVector(const String & iv_);
const String & str() const;
void inc() { ++counter; }
void inc(size_t n) { counter += n; }
void set(size_t n) { counter = n; }
static constexpr const size_t kSize = 16;
InitVector() = default;
explicit InitVector(const UInt128 & counter_) { set(counter_); }
void set(const UInt128 & counter_) { counter = counter_; }
UInt128 get() const { return counter; }
void read(ReadBuffer & in);
void write(WriteBuffer & out) const;
/// Write 16 bytes of the counter to a string in big endian order.
/// We need big endian because the used cipher algorithms treat an initialization vector as a counter in big endian.
String toString() const;
/// Converts a string of 16 bytes length in big endian order to a counter.
static InitVector fromString(const String & str_);
/// Adds a specified offset to the counter.
InitVector & operator++() { ++counter; return *this; }
InitVector operator++(int) { InitVector res = *this; ++counter; return res; }
InitVector & operator+=(size_t offset) { counter += offset; return *this; }
InitVector operator+(size_t offset) const { InitVector res = *this; return res += offset; }
/// Generates a random initialization vector.
static InitVector random();
private:
UInt128 iv;
UInt128 counter = 0;
mutable String local;
};
class EncryptionKey
/// Encrypts or decrypts data.
class Encryptor
{
public:
EncryptionKey(const String & key_) : key(key_) { }
size_t size() const { return key.size(); }
const String & str() const { return key; }
/// The `key` should have length 128 or 192 or 256.
/// According to the key's length aes_128_ctr or aes_192_ctr or aes_256_ctr will be used for encryption.
/// We chose to use CTR cipther algorithms because they have the following features which are important for us:
/// - No right padding, so we can append encrypted files without deciphering;
/// - One byte is always ciphered as one byte, so we get random access to encrypted files easily.
Encryptor(const String & key_, const InitVector & iv_);
/// Sets the current position in the data stream from the very beginning of data.
/// It affects how the data will be encrypted or decrypted because
/// the initialization vector is increased by an index of the current block
/// and the index of the current block is calculated from this offset.
void setOffset(size_t offset_) { offset = offset_; }
/// Encrypts some data.
/// Also the function moves `offset` by `size` (for successive encryptions).
void encrypt(const char * data, size_t size, WriteBuffer & out);
/// Decrypts some data.
/// The used cipher algorithms generate the same number of bytes in output as they were in input,
/// so the function always writes `size` bytes of the plaintext to `out`.
/// Also the function moves `offset` by `size` (for successive decryptions).
void decrypt(const char * data, size_t size, char * out);
private:
String key;
};
class Encryption
{
public:
Encryption(const String & iv_, const EncryptionKey & key_, size_t offset_);
protected:
size_t blockOffset(size_t pos) const { return pos % block_size; }
size_t blocks(size_t pos) const { return pos / block_size; }
size_t partBlockSize(size_t size, size_t off) const;
const EVP_CIPHER * get() const { return evp_cipher; }
const String key;
const InitVector init_vector;
const EVP_CIPHER * evp_cipher;
const String init_vector;
const EncryptionKey key;
size_t block_size;
/// absolute offset
/// The current position in the data stream from the very beginning of data.
size_t offset = 0;
};
class Encryptor : public Encryption
{
public:
using Encryption::Encryption;
void encrypt(const char * plaintext, WriteBuffer & buf, size_t size);
private:
String encryptPartialBlock(const char * partial_block, size_t size, const InitVector & iv, size_t off) const;
String encryptNBytes(const char * data, size_t bytes, const InitVector & iv) const;
};
class Decryptor : public Encryption
{
public:
Decryptor(const String & iv_, const EncryptionKey & key_) : Encryption(iv_, key_, 0) { }
void decrypt(const char * ciphertext, char * buf, size_t size, size_t off);
private:
void decryptPartialBlock(char *& to, const char * partial_block, size_t size, const InitVector & iv, size_t off) const;
void decryptNBytes(char *& to, const char * data, size_t bytes, const InitVector & iv) const;
};
String readIV(size_t size, ReadBuffer & in);
String randomString(size_t size);
void writeIV(const String & iv, WriteBuffer & out);
size_t cipherKeyLength(const EVP_CIPHER * evp_cipher);
size_t cipherIVLength(const EVP_CIPHER * evp_cipher);
const EVP_CIPHER * defaultCipher();
/// Checks whether a passed key length is supported, i.e.
/// whether its length is 128 or 192 or 256 bits (16 or 24 or 32 bytes).
bool isKeyLengthSupported(size_t key_length);
}
}

View File

@ -9,91 +9,95 @@ namespace ErrorCodes
extern const int ARGUMENT_OUT_OF_BOUND;
}
using InitVector = FileEncryption::InitVector;
ReadBufferFromEncryptedFile::ReadBufferFromEncryptedFile(
size_t buf_size_,
size_t buffer_size_,
std::unique_ptr<ReadBufferFromFileBase> in_,
const String & init_vector_,
const FileEncryption::EncryptionKey & key_,
const size_t iv_offset_)
: ReadBufferFromFileBase(buf_size_, nullptr, 0)
const String & key_,
const InitVector & init_vector_)
: ReadBufferFromFileBase(buffer_size_, nullptr, 0)
, in(std::move(in_))
, buf_size(buf_size_)
, decryptor(FileEncryption::Decryptor(init_vector_, key_))
, iv_offset(iv_offset_)
, encrypted_buffer(buffer_size_)
, encryptor(key_, init_vector_)
{
/// We should start reading from `in` at the offset == InitVector::kSize.
need_seek = true;
}
off_t ReadBufferFromEncryptedFile::seek(off_t off, int whence)
{
if (whence == SEEK_CUR)
{
if (off < 0 && -off > getPosition())
throw Exception("SEEK_CUR shift out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (!working_buffer.empty() && static_cast<size_t>(offset() + off) < working_buffer.size())
{
pos += off;
return getPosition();
}
else
start_pos = off + getPosition();
}
else if (whence == SEEK_SET)
off_t new_pos;
if (whence == SEEK_SET)
{
if (off < 0)
throw Exception("SEEK_SET underflow: off = " + std::to_string(off), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (!working_buffer.empty() && static_cast<size_t>(off) >= start_pos
&& static_cast<size_t>(off) < (start_pos + working_buffer.size()))
{
pos = working_buffer.begin() + (off - start_pos);
return getPosition();
}
else
start_pos = off;
new_pos = off;
}
else if (whence == SEEK_CUR)
{
if (off < 0 && -off > getPosition())
throw Exception("SEEK_CUR shift out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
new_pos = getPosition() + off;
}
else
throw Exception("ReadBufferFromEncryptedFile::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
throw Exception("ReadBufferFromFileEncrypted::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
initialize();
return start_pos;
if ((offset - static_cast<off_t>(working_buffer.size()) <= new_pos) && (new_pos <= offset) && !need_seek)
{
/// Position is still inside buffer.
pos = working_buffer.end() - offset + new_pos;
assert(pos >= working_buffer.begin());
assert(pos <= working_buffer.end());
}
else
{
need_seek = true;
offset = new_pos;
/// No more reading from the current working buffer until next() is called.
pos = working_buffer.end();
assert(!hasPendingData());
}
/// The encryptor always needs to know what the current offset is.
encryptor.setOffset(new_pos);
return new_pos;
}
off_t ReadBufferFromEncryptedFile::getPosition()
{
return offset - available();
}
bool ReadBufferFromEncryptedFile::nextImpl()
{
if (need_seek)
{
off_t raw_offset = offset + InitVector::kSize;
if (in->seek(raw_offset, SEEK_SET) != raw_offset)
return false;
need_seek = false;
}
if (in->eof())
return false;
if (initialized)
start_pos += working_buffer.size();
initialize();
return true;
}
void ReadBufferFromEncryptedFile::initialize()
{
size_t in_pos = start_pos + iv_offset;
String data;
data.resize(buf_size);
size_t data_size = 0;
in->seek(in_pos, SEEK_SET);
while (data_size < buf_size && !in->eof())
/// Read up to the size of `encrypted_buffer`.
size_t bytes_read = 0;
while (bytes_read < encrypted_buffer.size() && !in->eof())
{
auto size = in->read(data.data() + data_size, buf_size - data_size);
data_size += size;
in_pos += size;
in->seek(in_pos, SEEK_SET);
bytes_read += in->read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read);
}
data.resize(data_size);
working_buffer.resize(data_size);
decryptor.decrypt(data.data(), working_buffer.begin(), data_size, start_pos);
/// The used cipher algorithms generate the same number of bytes in output as it were in input,
/// so after deciphering the numbers of bytes will be still `bytes_read`.
working_buffer.resize(bytes_read);
encryptor.decrypt(encrypted_buffer.data(), bytes_read, working_buffer.begin());
pos = working_buffer.begin();
initialized = true;
return true;
}
}

View File

@ -12,39 +12,33 @@
namespace DB
{
/// Reads data from the underlying read buffer and decrypts it.
class ReadBufferFromEncryptedFile : public ReadBufferFromFileBase
{
public:
ReadBufferFromEncryptedFile(
size_t buf_size_,
size_t buffer_size_,
std::unique_ptr<ReadBufferFromFileBase> in_,
const String & init_vector_,
const FileEncryption::EncryptionKey & key_,
const size_t iv_offset_);
const String & key_,
const FileEncryption::InitVector & init_vector_);
off_t seek(off_t off, int whence) override;
off_t getPosition() override { return start_pos + offset(); }
off_t getPosition() override;
std::string getFileName() const override { return in->getFileName(); }
private:
bool nextImpl() override;
void initialize();
std::unique_ptr<ReadBufferFromFileBase> in;
size_t buf_size;
FileEncryption::Decryptor decryptor;
bool initialized = false;
off_t offset = 0;
bool need_seek = false;
// current working_buffer.begin() offset from decrypted file
size_t start_pos = 0;
size_t iv_offset = 0;
Memory<> encrypted_buffer;
FileEncryption::Encryptor encryptor;
};
}
#endif

View File

@ -327,6 +327,7 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
&& decoded_char != '"'
&& decoded_char != '`' /// MySQL style identifiers
&& decoded_char != '/' /// JavaScript in HTML
&& decoded_char != '=' /// Yandex's TSKV
&& !isControlASCII(decoded_char))
{
s.push_back('\\');

Some files were not shown because too many files have changed in this diff Show More