Merge branch 'master' into revert-34211-revert-34153-add_func_tests_over_s3

This commit is contained in:
alesapin 2022-02-08 19:42:27 +03:00
commit 02a93cb852
72 changed files with 1303 additions and 174 deletions

View File

@ -142,6 +142,7 @@ Checks: '-*,
clang-analyzer-cplusplus.PlacementNewChecker,
clang-analyzer-cplusplus.SelfAssignment,
clang-analyzer-deadcode.DeadStores,
clang-analyzer-cplusplus.Move,
clang-analyzer-optin.cplusplus.VirtualCall,
clang-analyzer-security.insecureAPI.UncheckedReturn,
clang-analyzer-security.insecureAPI.bcmp,

View File

@ -6,8 +6,8 @@ name: TagsStableWorkflow
on: # yamllint disable-line rule:truthy
push:
tags:
- 'v*-stable'
- 'v*-lts'
- 'v*-stable'
- 'v*-lts'
jobs:

View File

@ -108,7 +108,13 @@ Examples of configuration for quorum with three nodes can be found in [integrati
ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `<keeper_server>` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
```bash
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
clickhouse-keeper --config /etc/your_path_to_config/config.xml
```
If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as argument:
```bash
clickhouse keeper --config /etc/your_path_to_config/config.xml
```
## Four Letter Word Commands {#four-letter-word-commands}

View File

@ -1 +0,0 @@
../../../en/engines/database-engines/sqlite.md

View File

@ -0,0 +1,80 @@
---
toc_priority: 32
toc_title: SQLite
---
# SQLite {#sqlite}
允许连接到[SQLite](https://www.sqlite.org/index.html)数据库并支持ClickHouse和SQLite交换数据 执行 `INSERT``SELECT` 查询。
## 创建一个数据库 {#creating-a-database}
``` sql
CREATE DATABASE sqlite_database
ENGINE = SQLite('db_path')
```
**引擎参数**
- `db_path` — SQLite 数据库文件的路径.
## 数据类型的支持 {#data_types-support}
| SQLite | ClickHouse |
|---------------|---------------------------------------------------------|
| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) |
| REAL | [Float32](../../sql-reference/data-types/float.md) |
| TEXT | [String](../../sql-reference/data-types/string.md) |
| BLOB | [String](../../sql-reference/data-types/string.md) |
## 技术细节和建议 {#specifics-and-recommendations}
SQLite将整个数据库(定义、表、索引和数据本身)存储为主机上的单个跨平台文件。在写入过程中SQLite会锁定整个数据库文件因此写入操作是顺序执行的。读操作可以是多任务的。
SQLite不需要服务管理(如启动脚本)或基于`GRANT`和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。
## 使用示例 {#usage-example}
数据库在ClickHouse连接到SQLite:
``` sql
CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db');
SHOW TABLES FROM sqlite_db;
```
``` text
┌──name───┐
│ table1 │
│ table2 │
└─────────┘
```
展示数据表中的内容:
``` sql
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
└───────┴──────┘
```
从ClickHouse表插入数据到SQLite表:
``` sql
CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2;
INSERT INTO clickhouse_table VALUES ('text',10);
INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table;
SELECT * FROM sqlite_db.table1;
```
``` text
┌─col1──┬─col2─┐
│ line1 │ 1 │
│ line2 │ 2 │
│ line3 │ 3 │
│ text │ 10 │
└───────┴──────┘
```

View File

@ -0,0 +1,416 @@
---
toc_priority: 4
toc_title: Hive
---
# Hive {#hive}
Hive引擎允许对HDFS Hive表执行 `SELECT` 查询。目前它支持如下输入格式:
-文本:只支持简单的标量列类型,除了 `Binary`
- ORC:支持简单的标量列类型,除了`char`; 只支持 `array` 这样的复杂类型
- Parquet:支持所有简单标量列类型;只支持 `array` 这样的复杂类型
## 创建表 {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [ALIAS expr1],
name2 [type2] [ALIAS expr2],
...
) ENGINE = Hive('thrift://host:port', 'database', 'table');
PARTITION BY expr
```
查看[CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query)查询的详细描述。
表的结构可以与原来的Hive表结构有所不同:
- 列名应该与原来的Hive表相同但你可以使用这些列中的一些并以任何顺序你也可以使用一些从其他列计算的别名列。
- 列类型与原Hive表的列类型保持一致。
- “Partition by expression”应与原Hive表保持一致“Partition by expression”中的列应在表结构中。
**引擎参数**
- `thrift://host:port` — Hive Metastore 地址
- `database` — 远程数据库名.
- `table` — 远程数据表名.
## 使用示例 {#usage-example}
### 如何使用HDFS文件系统的本地缓存
我们强烈建议您为远程文件系统启用本地缓存。基准测试显示,如果使用缓存,它的速度会快两倍。
在使用缓存之前,请将其添加到 `config.xml`
``` xml
<local_cache_for_remote_fs>
<enable>true</enable>
<root_dir>local_cache</root_dir>
<limit_size>559096952</limit_size>
<bytes_read_before_flush>1048576</bytes_read_before_flush>
</local_cache_for_remote_fs>
```
- enable: 开启后ClickHouse将为HDFS (远程文件系统)维护本地缓存。
- root_dir: 必需的。用于存储远程文件系统的本地缓存文件的根目录。
- limit_size: 必需的。本地缓存文件的最大大小(单位为字节)。
- bytes_read_before_flush: 从远程文件系统下载文件时刷新到本地文件系统前的控制字节数。缺省值为1MB。
当ClickHouse为远程文件系统启用了本地缓存时用户仍然可以选择不使用缓存并在查询中设置`use_local_cache_for_remote_fs = 0 `, `use_local_cache_for_remote_fs` 默认为 `false`
### 查询 ORC 输入格式的Hive 表
#### 在 Hive 中建表
``` text
hive > CREATE TABLE `test`.`test_orc`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_orc'
OK
Time taken: 0.51 seconds
hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_orc;
OK
1 2 3 4 5 6.11 7.22 8 2021-11-05 12:38:16.314 2021-11-05 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.295 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse中的表从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_orc
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_bool` Bool,
`f_binary` String,
`f_array_int` Array(Int32),
`f_array_string` Array(String),
`f_array_float` Array(Float32),
`f_array_array_int` Array(Array(Int32)),
`f_array_array_string` Array(Array(String)),
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G
```
``` text
SELECT *
FROM test.test_orc
SETTINGS input_format_orc_allow_missing_columns = 1
Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-04 04:00:44
f_date: 2021-12-03
f_string: hello world
f_varchar: hello world
f_bool: true
f_binary: hello world
f_array_int: [1,2,3]
f_array_string: ['hello world','hello world']
f_array_float: [1.1,1.2]
f_array_array_int: [[1,2],[3,4]]
f_array_array_string: [['a','b'],['c','d']]
f_array_array_float: [[1.11,2.22],[3.33,4.44]]
day: 2021-09-18
1 rows in set. Elapsed: 0.078 sec.
```
### 查询 Parquest 输入格式的Hive 表
#### 在 Hive 中建表
``` text
hive >
CREATE TABLE `test`.`test_parquet`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_char` char(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_parquet'
OK
Time taken: 0.51 seconds
hive > insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_parquet;
OK
1 2 3 4 5 6.11 7.22 8 2021-12-14 17:54:56.743 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.766 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse 中的表, 从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_parquet
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_char` String,
`f_bool` Bool,
`f_binary` String,
`f_array_int` Array(Int32),
`f_array_string` Array(String),
`f_array_float` Array(Float32),
`f_array_array_int` Array(Array(Int32)),
`f_array_array_string` Array(Array(String)),
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G
```
``` text
SELECT *
FROM test_parquet
SETTINGS input_format_parquet_allow_missing_columns = 1
Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-14 17:54:56
f_date: 2021-12-14
f_string: hello world
f_varchar: hello world
f_char: hello world
f_bool: true
f_binary: hello world
f_array_int: [1,2,3]
f_array_string: ['hello world','hello world']
f_array_float: [1.1,1.2]
f_array_array_int: [[1,2],[3,4]]
f_array_array_string: [['a','b'],['c','d']]
f_array_array_float: [[1.11,2.22],[3.33,4.44]]
day: 2021-09-18
1 rows in set. Elapsed: 0.357 sec.
```
### 查询文本输入格式的Hive表
#### 在Hive 中建表
``` text
hive >
CREATE TABLE `test`.`test_text`(
`f_tinyint` tinyint,
`f_smallint` smallint,
`f_int` int,
`f_integer` int,
`f_bigint` bigint,
`f_float` float,
`f_double` double,
`f_decimal` decimal(10,0),
`f_timestamp` timestamp,
`f_date` date,
`f_string` string,
`f_varchar` varchar(100),
`f_char` char(100),
`f_bool` boolean,
`f_binary` binary,
`f_array_int` array<int>,
`f_array_string` array<string>,
`f_array_float` array<float>,
`f_array_array_int` array<array<int>>,
`f_array_array_string` array<array<string>>,
`f_array_array_float` array<array<float>>)
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://testcluster/data/hive/test.db/test_text'
Time taken: 0.1 seconds, Fetched: 34 row(s)
hive > insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44)));
OK
Time taken: 36.025 seconds
hive > select * from test.test_text;
OK
1 2 3 4 5 6.11 7.22 8 2021-12-14 18:11:17.239 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18
Time taken: 0.624 seconds, Fetched: 1 row(s)
```
#### 在 ClickHouse 中建表
ClickHouse中的表 从上面创建的Hive表中获取数据:
``` sql
CREATE TABLE test.test_text
(
`f_tinyint` Int8,
`f_smallint` Int16,
`f_int` Int32,
`f_integer` Int32,
`f_bigint` Int64,
`f_float` Float32,
`f_double` Float64,
`f_decimal` Float64,
`f_timestamp` DateTime,
`f_date` Date,
`f_string` String,
`f_varchar` String,
`f_char` String,
`f_bool` Bool,
`day` String
)
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text')
PARTITION BY day
```
``` sql
SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G
```
``` text
SELECT *
FROM test.test_text
SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'
Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44
Row 1:
──────
f_tinyint: 1
f_smallint: 2
f_int: 3
f_integer: 4
f_bigint: 5
f_float: 6.11
f_double: 7.22
f_decimal: 8
f_timestamp: 2021-12-14 18:11:17
f_date: 2021-12-14
f_string: hello world
f_varchar: hello world
f_char: hello world
f_bool: true
day: 2021-09-18
```

View File

@ -19,3 +19,5 @@ ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像
- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md)
- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md)
- [SQLite](../../../engines/table-engines/integrations/sqlite.md)
- [Hive](../../../engines/table-engines/integrations/hive.md)

View File

@ -57,8 +57,16 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
Settings cmd_settings;
for (const auto & field : cmd_settings.all())
{
if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
cmd_settings.addProgramOption(desc, field);
}
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
po::notify(options);
if (options.count("help"))
{
@ -149,7 +157,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
ParserQuery parser(end);
do
{
ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
ASTPtr res = parseQueryAndMovePosition(
parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth);
/// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail,
/// should throw exception early and make exception message more readable.
if (const auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
@ -222,6 +231,5 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
std::cerr << getCurrentExceptionMessage(true) << '\n';
return getCurrentExceptionCode();
}
return 0;
}

View File

@ -106,7 +106,7 @@ void CachedCompressedReadBuffer::seek(size_t offset_in_compressed_file, size_t o
/// We will discard our working_buffer, but have to account rest bytes
bytes += offset();
/// No data, everything discarded
pos = working_buffer.end();
resetWorkingBuffer();
owned_cell.reset();
/// Remember required offset in decompressed block which will be set in

View File

@ -80,7 +80,7 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t
/// We will discard our working_buffer, but have to account rest bytes
bytes += offset();
/// No data, everything discarded
pos = working_buffer.end();
resetWorkingBuffer();
size_compressed = 0;
/// Remember required offset in decompressed block which will be set in
/// the next ReadBuffer::next() call

View File

@ -755,4 +755,30 @@ void materializeBlockInplace(Block & block)
block.getByPosition(i).column = recursiveRemoveSparse(block.getByPosition(i).column->convertToFullColumnIfConst());
}
Block concatenateBlocks(const std::vector<Block> & blocks)
{
if (blocks.empty())
return {};
size_t num_rows = 0;
for (const auto & block : blocks)
num_rows += block.rows();
Block out = blocks[0].cloneEmpty();
MutableColumns columns = out.mutateColumns();
for (size_t i = 0; i < columns.size(); ++i)
{
columns[i]->reserve(num_rows);
for (const auto & block : blocks)
{
const auto & tmp_column = *block.getByPosition(i).column;
columns[i]->insertRangeFrom(tmp_column, 0, block.rows());
}
}
out.setColumns(std::move(columns));
return out;
}
}

View File

@ -203,4 +203,6 @@ ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column
Block materializeBlock(const Block & block);
void materializeBlockInplace(Block & block);
Block concatenateBlocks(const std::vector<Block> & blocks);
}

View File

@ -85,16 +85,18 @@ void Settings::addProgramOptions(boost::program_options::options_description & o
{
for (const auto & field : all())
{
const std::string_view name = field.getName();
auto on_program_option
= boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
name.data(),
boost::program_options::value<std::string>()->composing()->notifier(on_program_option),
field.getDescription())));
addProgramOption(options, field);
}
}
void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field)
{
const std::string_view name = field.getName();
auto on_program_option = boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
}
void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path)
{
if (config.getBool("skip_check_for_incorrect_settings", false))

View File

@ -610,6 +610,7 @@ class IColumn;
M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \
M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \
M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
\
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
@ -717,6 +718,8 @@ struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings
static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path);
std::vector<String> getAllRegisteredNames() const override;
void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field);
};
/*

View File

@ -130,4 +130,10 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS,
{"JSON", FormatSettings::EscapingRule::JSON},
{"XML", FormatSettings::EscapingRule::XML},
{"Raw", FormatSettings::EscapingRule::Raw}})
IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
{{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN},
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
}

View File

@ -172,4 +172,6 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparin
DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
}

View File

@ -167,8 +167,10 @@ String getNameForSubstreamPath(
/// Because nested data may be represented not by Array of Tuple,
/// but by separate Array columns with names in a form of a.b,
/// and name is encoded as a whole.
stream_name += (escape_tuple_delimiter && it->escape_tuple_delimiter ?
escapeForFileName(".") : ".") + escapeForFileName(it->tuple_element_name);
if (escape_tuple_delimiter && it->escape_tuple_delimiter)
stream_name += escapeForFileName("." + it->tuple_element_name);
else
stream_name += "." + it->tuple_element_name;
}
}

View File

@ -78,7 +78,9 @@ ASTPtr DatabaseMemory::getCreateDatabaseQuery() const
auto create_query = std::make_shared<ASTCreateQuery>();
create_query->setDatabase(getDatabaseName());
create_query->set(create_query->storage, std::make_shared<ASTStorage>());
create_query->storage->set(create_query->storage->engine, makeASTFunction(getEngineName()));
auto engine = makeASTFunction(getEngineName());
engine->no_empty_args = true;
create_query->storage->set(create_query->storage->engine, engine);
if (const auto comment_value = getDatabaseComment(); !comment_value.empty())
create_query->set(create_query->comment, std::make_shared<ASTLiteral>(comment_value));

View File

@ -243,7 +243,7 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence
prefetch_future = {};
}
pos = working_buffer.end();
resetWorkingBuffer();
/**
* Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer.

View File

@ -64,7 +64,7 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence)
throw Exception("Only SEEK_SET or SEEK_CUR modes are allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
impl->reset();
pos = working_buffer.end();
resetWorkingBuffer();
return impl->file_offset_of_buffer_end;
}

View File

@ -129,6 +129,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.seekable_read = settings.input_format_allow_seeks;
format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns;
format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation;
format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference;
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context

View File

@ -231,9 +231,17 @@ struct FormatSettings
EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES;
} capn_proto;
enum class MsgPackUUIDRepresentation
{
STR, // Output UUID as a string of 36 characters.
BIN, // Output UUID as 16-bytes binary.
EXT, // Output UUID as ExtType = 2
};
struct
{
UInt64 number_of_columns = 0;
MsgPackUUIDRepresentation output_uuid_representation = MsgPackUUIDRepresentation::EXT;
} msgpack;
};

View File

@ -0,0 +1,11 @@
#pragma once
namespace DB
{
enum class MsgPackExtensionTypes
{
UUID = 0x02,
};
}

View File

@ -181,8 +181,8 @@ off_t AsynchronousReadBufferFromFileDescriptor::seek(off_t offset, int whence)
off_t offset_after_seek_pos = new_pos - seek_pos;
/// First put position at the end of the buffer so the next read will fetch new data to the buffer.
pos = working_buffer.end();
/// First reset the buffer so the next read will fetch new data to the buffer.
resetWorkingBuffer();
/// Just update the info about the next position in file.

View File

@ -97,6 +97,15 @@ public:
bool isPadded() const { return padded; }
protected:
void resetWorkingBuffer()
{
/// Move position to the end of buffer to trigger call of 'next' on next reading.
/// Discard all data in current working buffer to prevent wrong assumptions on content
/// of buffer, e.g. for optimizations of seeks in seekable buffers.
working_buffer.resize(0);
pos = working_buffer.end();
}
/// Read/write position.
Position pos;

View File

@ -56,7 +56,7 @@ off_t ReadBufferFromEncryptedFile::seek(off_t off, int whence)
offset = new_pos;
/// No more reading from the current working buffer until next() is called.
pos = working_buffer.end();
resetWorkingBuffer();
assert(!hasPendingData());
}

View File

@ -111,7 +111,6 @@ bool ReadBufferFromFileDescriptor::nextImpl()
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read);
working_buffer = internal_buffer;
working_buffer.resize(bytes_read);
buffer_is_dirty = false;
}
else
return false;
@ -153,10 +152,10 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
}
/// Position is unchanged.
if (!buffer_is_dirty && (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end))
if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end)
return new_pos;
if (!buffer_is_dirty && file_offset_of_buffer_end - working_buffer.size() <= static_cast<size_t>(new_pos)
if (file_offset_of_buffer_end - working_buffer.size() <= static_cast<size_t>(new_pos)
&& new_pos <= file_offset_of_buffer_end)
{
/// Position is still inside the buffer.
@ -177,12 +176,8 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
off_t offset_after_seek_pos = new_pos - seek_pos;
/// First put position at the end of the buffer so the next read will fetch new data to the buffer.
pos = working_buffer.end();
/// Mark buffer as dirty to disallow further seek optimizations, because fetching data to the buffer
/// is delayed to the next call of 'nextImpl', but it may be not called before next seek.
buffer_is_dirty = true;
/// First reset the buffer so the next read will fetch new data to the buffer.
resetWorkingBuffer();
/// In case of using 'pread' we just update the info about the next position in file.
/// In case of using 'read' we call 'lseek'.
@ -234,7 +229,6 @@ void ReadBufferFromFileDescriptor::rewind()
working_buffer.resize(0);
pos = working_buffer.begin();
file_offset_of_buffer_end = 0;
buffer_is_dirty = true;
}

View File

@ -62,9 +62,6 @@ public:
private:
/// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
bool poll(size_t timeout_microseconds);
/// If it's true then we cannot assume on content of buffer to optimize seek calls.
bool buffer_is_dirty = true;
};

View File

@ -187,7 +187,7 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence)
}
}
pos = working_buffer.end();
resetWorkingBuffer();
if (impl)
{
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);

View File

@ -498,7 +498,7 @@ namespace detail
impl.reset();
}
pos = working_buffer.end();
resetWorkingBuffer();
read_range.begin = offset_;
read_range.end = std::nullopt;
offset_from_begin_pos = 0;

View File

@ -320,25 +320,7 @@ Block SortedBlocksBuffer::mergeBlocks(Blocks && blocks) const
if (blocks.size() == 1)
return blocks[0];
Block out = blocks[0].cloneEmpty();
{ /// Concatenate blocks
MutableColumns columns = out.mutateColumns();
for (size_t i = 0; i < columns.size(); ++i)
{
columns[i]->reserve(num_rows);
for (const auto & block : blocks)
{
const auto & tmp_column = *block.getByPosition(i).column;
columns[i]->insertRangeFrom(tmp_column, 0, block.rows());
}
}
out.setColumns(std::move(columns));
}
return out;
return concatenateBlocks(blocks);
}
}

View File

@ -689,7 +689,7 @@ bool ParserUnaryExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr expr_ast;
if (!ParserExpressionElement().parse(pos, expr_ast, expected))
if (!elem_parser->parse(pos, expr_ast, expected))
return false;
ASTPtr type_ast;
@ -711,7 +711,7 @@ bool ParserArrayElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected
{
return ParserLeftAssociativeBinaryOperatorList{
operators,
std::make_unique<ParserCastExpression>(),
std::make_unique<ParserCastExpression>(std::make_unique<ParserExpressionElement>()),
std::make_unique<ParserExpressionWithOptionalAlias>(false)
}.parse(pos, node, expected);
}
@ -721,7 +721,7 @@ bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected
{
return ParserLeftAssociativeBinaryOperatorList{
operators,
std::make_unique<ParserArrayElementExpression>(),
std::make_unique<ParserCastExpression>(std::make_unique<ParserArrayElementExpression>()),
std::make_unique<ParserUnsignedInteger>()
}.parse(pos, node, expected);
}

View File

@ -203,6 +203,15 @@ protected:
/// Example: "[1, 1 + 1, 1 + 2]::Array(UInt8)"
class ParserCastExpression : public IParserBase
{
private:
ParserPtr elem_parser;
public:
ParserCastExpression(ParserPtr && elem_parser_)
: elem_parser(std::move(elem_parser_))
{
}
protected:
const char * getName() const override { return "CAST expression"; }
@ -238,7 +247,7 @@ class ParserUnaryExpression : public IParserBase
{
private:
static const char * operators[];
ParserPrefixUnaryOperatorExpression operator_parser {operators, std::make_unique<ParserTupleElementExpression>()};
ParserPrefixUnaryOperatorExpression operator_parser {operators, std::make_unique<ParserCastExpression>(std::make_unique<ParserTupleElementExpression>())};
protected:
const char * getName() const override { return "unary expression"; }

View File

@ -5,6 +5,7 @@
#include <cstdlib>
#include <Common/assert_cast.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromMemory.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDateTime64.h>
@ -12,6 +13,7 @@
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeUUID.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnNullable.h>
@ -20,6 +22,8 @@
#include <Columns/ColumnMap.h>
#include <Columns/ColumnLowCardinality.h>
#include <Formats/MsgPackExtensionTypes.h>
namespace DB
{
@ -153,16 +157,29 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value)
}
}
static void insertString(IColumn & column, DataTypePtr type, const char * value, size_t size)
static void insertString(IColumn & column, DataTypePtr type, const char * value, size_t size, bool bin)
{
auto insert_func = [&](IColumn & column_, DataTypePtr type_)
{
insertString(column_, type_, value, size);
insertString(column_, type_, value, size, bin);
};
if (checkAndInsertNullable(column, type, insert_func) || checkAndInsertLowCardinality(column, type, insert_func))
return;
if (isUUID(type))
{
ReadBufferFromMemory buf(value, size);
UUID uuid;
if (bin)
readBinary(uuid, buf);
else
readUUIDText(uuid, buf);
assert_cast<ColumnUUID &>(column).insertValue(uuid);
return;
}
if (!isStringOrFixedString(type))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack string into column with type {}.", type->getName());
@ -218,6 +235,15 @@ static void insertNull(IColumn & column, DataTypePtr type)
assert_cast<ColumnNullable &>(column).insertDefault();
}
static void insertUUID(IColumn & column, DataTypePtr /*type*/, const char * value, size_t size)
{
ReadBufferFromMemory buf(value, size);
UUID uuid;
readBinaryBigEndian(uuid.toUnderType().items[0], buf);
readBinaryBigEndian(uuid.toUnderType().items[1], buf);
assert_cast<ColumnUUID &>(column).insertValue(uuid);
}
bool MsgPackVisitor::visit_positive_integer(UInt64 value) // NOLINT
{
insertInteger(info_stack.top().column, info_stack.top().type, value);
@ -232,13 +258,13 @@ bool MsgPackVisitor::visit_negative_integer(Int64 value) // NOLINT
bool MsgPackVisitor::visit_str(const char * value, size_t size) // NOLINT
{
insertString(info_stack.top().column, info_stack.top().type, value, size);
insertString(info_stack.top().column, info_stack.top().type, value, size, false);
return true;
}
bool MsgPackVisitor::visit_bin(const char * value, size_t size) // NOLINT
{
insertString(info_stack.top().column, info_stack.top().type, value, size);
insertString(info_stack.top().column, info_stack.top().type, value, size, true);
return true;
}
@ -324,6 +350,18 @@ bool MsgPackVisitor::visit_nil()
return true;
}
bool MsgPackVisitor::visit_ext(const char * value, uint32_t size)
{
int8_t type = *value;
if (*value == int8_t(MsgPackExtensionTypes::UUID))
{
insertUUID(info_stack.top().column, info_stack.top().type, value + 1, size - 1);
return true;
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type);
}
void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT
{
throw Exception("Error occurred while parsing msgpack data.", ErrorCodes::INCORRECT_DATA);
@ -455,8 +493,13 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
}
case msgpack::type::object_type::NIL:
return nullptr;
default:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack type is not supported");
case msgpack::type::object_type::EXT:
{
msgpack::object_ext object_ext = object.via.ext;
if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUID))
return std::make_shared<DataTypeUUID>();
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type());
}
}
}

View File

@ -42,6 +42,7 @@ public:
bool end_map_key();
bool start_map_value();
bool end_map_value();
bool visit_ext(const char * value, uint32_t size);
/// This function will be called if error occurs in parsing
[[noreturn]] void parse_error(size_t parsed_offset, size_t error_offset);
@ -55,7 +56,7 @@ private:
std::stack<Info> info_stack;
};
class MsgPackRowInputFormat final : public IRowInputFormat
class MsgPackRowInputFormat : public IRowInputFormat
{
public:
MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_);

View File

@ -5,6 +5,9 @@
#include <Formats/FormatFactory.h>
#include <Common/assert_cast.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeNullable.h>
@ -19,6 +22,8 @@
#include <Columns/ColumnMap.h>
#include <Columns/ColumnLowCardinality.h>
#include <Formats/MsgPackExtensionTypes.h>
namespace DB
{
@ -27,8 +32,8 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
}
MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_)
: IRowOutputFormat(header_, out_, params_), packer(out_) {}
MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_)
: IRowOutputFormat(header_, out_, params_), packer(out_), format_settings(format_settings_) {}
void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num)
{
@ -164,6 +169,42 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr
serializeField(*dict_column, dict_type, index);
return;
}
case TypeIndex::UUID:
{
const auto & uuid_column = assert_cast<const ColumnUUID &>(column);
switch (format_settings.msgpack.output_uuid_representation)
{
case FormatSettings::MsgPackUUIDRepresentation::BIN:
{
WriteBufferFromOwnString buf;
writeBinary(uuid_column.getElement(row_num), buf);
StringRef uuid_bin = buf.stringRef();
packer.pack_bin(uuid_bin.size);
packer.pack_bin_body(uuid_bin.data, uuid_bin.size);
return;
}
case FormatSettings::MsgPackUUIDRepresentation::STR:
{
WriteBufferFromOwnString buf;
writeText(uuid_column.getElement(row_num), buf);
StringRef uuid_text = buf.stringRef();
packer.pack_str(uuid_text.size);
packer.pack_bin_body(uuid_text.data, uuid_text.size);
return;
}
case FormatSettings::MsgPackUUIDRepresentation::EXT:
{
WriteBufferFromOwnString buf;
UUID value = uuid_column.getElement(row_num);
writeBinaryBigEndian(value.toUnderType().items[0], buf);
writeBinaryBigEndian(value.toUnderType().items[1], buf);
StringRef uuid_ext = buf.stringRef();
packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUID));
packer.pack_ext_body(uuid_ext.data, uuid_ext.size);
return;
}
}
}
default:
break;
}
@ -186,9 +227,9 @@ void registerOutputFormatMsgPack(FormatFactory & factory)
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams & params,
const FormatSettings &)
const FormatSettings & settings)
{
return std::make_shared<MsgPackRowOutputFormat>(buf, sample, params);
return std::make_shared<MsgPackRowOutputFormat>(buf, sample, params, settings);
});
factory.markOutputFormatSupportsParallelFormatting("MsgPack");
}

View File

@ -18,7 +18,7 @@ namespace DB
class MsgPackRowOutputFormat final : public IRowOutputFormat
{
public:
MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_);
MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_);
String getName() const override { return "MsgPackRowOutputFormat"; }
@ -28,6 +28,7 @@ private:
void serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num);
msgpack::packer<DB::WriteBuffer> packer;
const FormatSettings format_settings;
};
}

View File

@ -292,7 +292,7 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::initAggregateDescription
AggregatingSortedAlgorithm::AggregatingSortedAlgorithm(
const Block & header, size_t num_inputs,
SortDescription description_, size_t max_block_size)
: IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_))
: IMergingAlgorithmWithDelayedChunk(num_inputs, description_)
, columns_definition(defineColumns(header, description_))
, merged_data(getMergedColumns(header, columns_definition), max_block_size, columns_definition)
{

View File

@ -9,7 +9,6 @@
#include <Common/SettingsChanges.h>
#include <Common/setThreadName.h>
#include <Common/Stopwatch.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <DataTypes/DataTypeFactory.h>
#include <QueryPipeline/ProfileInfo.h>
#include <Interpreters/Context.h>
@ -990,41 +989,10 @@ namespace
assert(!pipeline);
auto source = query_context->getInputFormat(
input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size);
QueryPipelineBuilder builder;
builder.init(Pipe(source));
/// Add default values if necessary.
if (ast)
{
if (insert_query)
{
auto table_id = StorageID::createEmpty();
if (insert_query->table_id)
{
table_id = query_context->resolveStorageID(insert_query->table_id, Context::ResolveOrdinary);
}
else
{
StorageID local_table_id(insert_query->getDatabase(), insert_query->getTable());
table_id = query_context->resolveStorageID(local_table_id, Context::ResolveOrdinary);
}
if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields && table_id)
{
StoragePtr storage = DatabaseCatalog::instance().getTable(table_id, query_context);
const auto & columns = storage->getInMemoryMetadataPtr()->getColumns();
if (!columns.empty())
{
builder.addSimpleTransform([&](const Block & cur_header)
{
return std::make_shared<AddingDefaultsTransform>(cur_header, columns, *source, query_context);
});
}
}
}
}
pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
pipeline_executor = std::make_unique<PullingPipelineExecutor>(*pipeline);
}

View File

@ -173,7 +173,7 @@ off_t ReadBufferFromHDFS::seek(off_t offset_, int whence)
return getPosition();
}
pos = working_buffer.end();
resetWorkingBuffer();
impl->seek(offset_, whence);
return impl->getPosition();
}

View File

@ -8,6 +8,9 @@
#include <Interpreters/TreeRewriter.h>
#include <Storages/extractKeyExpressionList.h>
#include <Common/quoteString.h>
#include <Interpreters/FunctionNameNormalizer.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
namespace DB
@ -161,4 +164,17 @@ KeyDescription KeyDescription::buildEmptyKey()
return result;
}
KeyDescription KeyDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context)
{
KeyDescription result;
if (str.empty())
return result;
ParserExpression parser;
ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
FunctionNameNormalizer().visit(ast.get());
return getKeyFromAST(ast, columns, context);
}
}

View File

@ -76,6 +76,9 @@ struct KeyDescription
/// Substitute modulo with moduloLegacy. Used in KeyCondition to allow proper comparison with keys.
static bool moduloToModuloLegacyRecursive(ASTPtr node_expr);
/// Parse description from string
static KeyDescription parse(const String & str, const ColumnsDescription & columns, ContextPtr context);
};
}

View File

@ -168,7 +168,7 @@ ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const S
}
void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const
void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const
{
if (data_format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{
@ -203,9 +203,12 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat
/// NOTE: You can make a less strict check of match expressions so that tables do not break from small changes
/// in formatAST code.
if (primary_key != from_zk.primary_key)
String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).expression_list_ast);
if (primary_key != parsed_zk_primary_key)
throw Exception("Existing table metadata in ZooKeeper differs in primary key."
" Stored in ZooKeeper: " + from_zk.primary_key + ", local: " + primary_key,
" Stored in ZooKeeper: " + from_zk.primary_key +
", parsed from ZooKeeper: " + parsed_zk_primary_key +
", local: " + primary_key,
ErrorCodes::METADATA_MISMATCH);
if (data_format_version != from_zk.data_format_version)
@ -214,39 +217,53 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat
", local: " + DB::toString(data_format_version.toUnderType()),
ErrorCodes::METADATA_MISMATCH);
if (partition_key != from_zk.partition_key)
String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context).expression_list_ast);
if (partition_key != parsed_zk_partition_key)
throw Exception(
"Existing table metadata in ZooKeeper differs in partition key expression."
" Stored in ZooKeeper: " + from_zk.partition_key + ", local: " + partition_key,
" Stored in ZooKeeper: " + from_zk.partition_key +
", parsed from ZooKeeper: " + parsed_zk_partition_key +
", local: " + partition_key,
ErrorCodes::METADATA_MISMATCH);
}
void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const
{
checkImmutableFieldsEquals(from_zk);
checkImmutableFieldsEquals(from_zk, columns, context);
if (sampling_expression != from_zk.sampling_expression)
throw Exception("Existing table metadata in ZooKeeper differs in sample expression."
" Stored in ZooKeeper: " + from_zk.sampling_expression + ", local: " + sampling_expression,
ErrorCodes::METADATA_MISMATCH);
if (sorting_key != from_zk.sorting_key)
String parsed_zk_sampling_expression = formattedAST(KeyDescription::parse(from_zk.sampling_expression, columns, context).definition_ast);
if (sampling_expression != parsed_zk_sampling_expression)
{
throw Exception(
"Existing table metadata in ZooKeeper differs in sorting key expression."
" Stored in ZooKeeper: " + from_zk.sorting_key + ", local: " + sorting_key,
"Existing table metadata in ZooKeeper differs in sample expression."
" Stored in ZooKeeper: " + from_zk.sampling_expression +
", parsed from ZooKeeper: " + parsed_zk_sampling_expression +
", local: " + sampling_expression,
ErrorCodes::METADATA_MISMATCH);
}
if (ttl_table != from_zk.ttl_table)
String parsed_zk_sorting_key = formattedAST(extractKeyExpressionList(KeyDescription::parse(from_zk.sorting_key, columns, context).definition_ast));
if (sorting_key != parsed_zk_sorting_key)
{
throw Exception(
"Existing table metadata in ZooKeeper differs in TTL."
" Stored in ZooKeeper: " + from_zk.ttl_table +
", local: " + ttl_table,
ErrorCodes::METADATA_MISMATCH);
"Existing table metadata in ZooKeeper differs in sorting key expression."
" Stored in ZooKeeper: " + from_zk.sorting_key +
", parsed from ZooKeeper: " + parsed_zk_sorting_key +
", local: " + sorting_key,
ErrorCodes::METADATA_MISMATCH);
}
auto parsed_primary_key = KeyDescription::parse(primary_key, columns, context);
String parsed_zk_ttl_table = formattedAST(TTLTableDescription::parse(from_zk.ttl_table, columns, context, parsed_primary_key).definition_ast);
if (ttl_table != parsed_zk_ttl_table)
{
throw Exception(
"Existing table metadata in ZooKeeper differs in TTL."
" Stored in ZooKeeper: " + from_zk.ttl_table +
", parsed from ZooKeeper: " + parsed_zk_ttl_table +
", local: " + ttl_table,
ErrorCodes::METADATA_MISMATCH);
}
String parsed_zk_skip_indices = IndicesDescription::parse(from_zk.skip_indices, columns, context).toString();
@ -290,10 +307,10 @@ void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTabl
}
ReplicatedMergeTreeTableMetadata::Diff
ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk) const
ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const
{
checkImmutableFieldsEquals(from_zk);
checkImmutableFieldsEquals(from_zk, columns, context);
Diff diff;

View File

@ -70,11 +70,11 @@ struct ReplicatedMergeTreeTableMetadata
void checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const;
Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk) const;
Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const;
private:
void checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const;
void checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const;
bool index_granularity_bytes_found_in_zk = false;
};

View File

@ -4491,7 +4491,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer
auto alter_lock_holder = lockForAlter(getSettings()->lock_acquire_timeout_for_background_operations);
LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally.");
auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry);
auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry, getInMemoryMetadataPtr()->getColumns(), getContext());
setTableStructure(std::move(columns_from_entry), metadata_diff);
metadata_version = entry.alter_version;

View File

@ -15,6 +15,9 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Interpreters/FunctionNameNormalizer.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
namespace DB
@ -370,4 +373,17 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST(
return result;
}
TTLTableDescription TTLTableDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key)
{
TTLTableDescription result;
if (str.empty())
return result;
ParserTTLExpressionList parser;
ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
FunctionNameNormalizer().visit(ast.get());
return getTTLForTableFromAST(ast, columns, context, primary_key);
}
}

View File

@ -118,6 +118,9 @@ struct TTLTableDescription
static TTLTableDescription getTTLForTableFromAST(
const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key);
/// Parse description from string
static TTLTableDescription parse(const String & str, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key);
};
}

View File

@ -0,0 +1,99 @@
#include <Formats/ReadSchemaUtils.h>
#include <IO/ReadBufferFromString.h>
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTLiteral.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Formats/IInputFormat.h>
#include <QueryPipeline/Pipe.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Storages/StorageValues.h>
#include <TableFunctions/TableFunctionFormat.h>
#include <TableFunctions/TableFunctionFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
ASTs & args_func = ast_function->children;
if (args_func.size() != 1)
throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR);
ASTs & args = args_func.at(0)->children;
if (args.size() != 2)
throw Exception("Table function '" + getName() + "' requires 2 arguments: format and data", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (auto & arg : args)
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
format = args[0]->as<ASTLiteral &>().value.safeGet<String>();
data = args[1]->as<ASTLiteral &>().value.safeGet<String>();
}
ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context) const
{
auto read_buffer_creator = [&]()
{
return std::make_unique<ReadBufferFromString>(data);
};
return readSchemaFromFormat(format, std::nullopt, read_buffer_creator, context);
}
Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr context) const
{
Block block;
for (const auto & name_and_type : columns.getAllPhysical())
block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name});
auto read_buf = std::make_unique<ReadBufferFromString>(data);
auto input_format = context->getInputFormat(format, *read_buf, block, context->getSettingsRef().max_block_size);
QueryPipelineBuilder builder;
builder.init(Pipe(input_format));
auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
auto reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
std::vector<Block> blocks;
while (reader->pull(block))
blocks.push_back(std::move(block));
if (blocks.size() == 1)
return blocks[0];
/// In case when data contains more then 1 block we combine
/// them all to one big block (this is considered a rare case).
return concatenateBlocks(blocks);
}
StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
{
auto columns = getActualTableStructure(context);
Block res_block = parseData(columns, context);
auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block);
res->startup();
return res;
}
void registerTableFunctionFormat(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionFormat>(TableFunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,33 @@
#pragma once
#include <TableFunctions/ITableFunction.h>
namespace DB
{
class Context;
/* format(format_name, data) - ...
*/
class TableFunctionFormat : public ITableFunction
{
public:
static constexpr auto name = "format";
std::string getName() const override { return name; }
bool hasStaticStructure() const override { return false; }
private:
StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override;
const char * getStorageTypeName() const override { return "Values"; }
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
Block parseData(ColumnsDescription columns, ContextPtr context) const;
String format;
String data;
};
}

View File

@ -5,11 +5,9 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Common/Exception.h>
#include <Common/typeid_cast.h>
#include <Storages/StorageInput.h>
#include <DataTypes/DataTypeFactory.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <boost/algorithm/string.hpp>
#include "registerTableFunctions.h"

View File

@ -4,12 +4,11 @@
#include <Core/Block.h>
#include <Storages/StorageValues.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/getLeastSupertype.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionValues.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/parseColumnsListForTableFunction.h>
@ -28,13 +27,14 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
}
static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args, const Block & sample_block, ContextPtr context)
static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args, const Block & sample_block, size_t start, ContextPtr context)
{
if (res_columns.size() == 1) /// Parsing arguments as Fields
{
for (size_t i = 1; i < args.size(); ++i)
for (size_t i = start; i < args.size(); ++i)
{
const auto & [value_field, value_type_ptr] = evaluateConstantExpression(args[i], context);
@ -44,7 +44,7 @@ static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args
}
else /// Parsing arguments as Tuples
{
for (size_t i = 1; i < args.size(); ++i)
for (size_t i = start; i < args.size(); ++i)
{
const auto & [value_field, value_type_ptr] = evaluateConstantExpression(args[i], context);
@ -68,34 +68,59 @@ static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args
}
}
void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/)
DataTypes TableFunctionValues::getTypesFromArgument(const ASTPtr & arg, ContextPtr context)
{
const auto & [value_field, value_type_ptr] = evaluateConstantExpression(arg, context);
DataTypes types;
if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(value_type_ptr.get()))
return type_tuple->getElements();
return {value_type_ptr};
}
void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
ASTs & args_func = ast_function->children;
if (args_func.size() != 1)
throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR);
ASTs & args = args_func.at(0)->children;
if (args.size() < 2)
throw Exception("Table function '" + getName() + "' requires 2 or more arguments: structure and values.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (args.empty())
throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
/// Parsing first argument as table structure and creating a sample block
if (!args[0]->as<const ASTLiteral>())
const auto & literal = args[0]->as<const ASTLiteral>();
String value;
if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context))
{
throw Exception(fmt::format(
"The first argument of table function '{}' must be a literal. "
"Got '{}' instead", getName(), args[0]->formatForErrorMessage()),
ErrorCodes::BAD_ARGUMENTS);
has_structure_in_arguments = true;
return;
}
structure = args[0]->as<ASTLiteral &>().value.safeGet<String>();
has_structure_in_arguments = false;
DataTypes data_types = getTypesFromArgument(args[0], context);
for (size_t i = 1; i < args.size(); ++i)
{
auto arg_types = getTypesFromArgument(args[i], context);
if (data_types.size() != arg_types.size())
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
"Cannot determine common structure for {} function arguments: the amount of columns is differ for different arguments",
getName());
for (size_t j = 0; j != arg_types.size(); ++j)
data_types[j] = getLeastSupertype({data_types[j], arg_types[j]});
}
NamesAndTypesList names_and_types;
for (size_t i = 0; i != data_types.size(); ++i)
names_and_types.emplace_back("c" + std::to_string(i + 1), data_types[i]);
structure = ColumnsDescription(names_and_types);
}
ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr context) const
ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr /*context*/) const
{
return parseColumnsListFromString(structure, context);
return structure;
}
StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
@ -111,7 +136,7 @@ StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, Context
ASTs & args = ast_function->children.at(0)->children;
/// Parsing other arguments as values and inserting them into columns
parseAndInsertValues(res_columns, args, sample_block, context);
parseAndInsertValues(res_columns, args, sample_block, has_structure_in_arguments ? 1 : 0, context);
Block res_block = sample_block.cloneWithColumns(std::move(res_columns));

View File

@ -20,7 +20,10 @@ private:
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
String structure;
static DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context);
ColumnsDescription structure;
bool has_structure_in_arguments;
};

View File

@ -28,4 +28,24 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, Con
return InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false);
}
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, ContextPtr context)
{
ParserColumnDeclarationList parser;
const Settings & settings = context->getSettingsRef();
String error;
const char * start = structure.data();
const char * end = structure.data() + structure.size();
ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth);
if (!columns_list_raw)
return false;
auto * columns_list = dynamic_cast<ASTExpressionList *>(columns_list_raw.get());
if (!columns_list)
return false;
columns = InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false);
return true;
}
}

View File

@ -12,4 +12,6 @@ class Context;
/// Parses a common argument for table functions such as table structure given in string
ColumnsDescription parseColumnsListFromString(const std::string & structure, ContextPtr context);
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, ContextPtr context);
}

View File

@ -49,6 +49,8 @@ void registerTableFunctions()
#endif
registerTableFunctionDictionary(factory);
registerTableFunctionFormat(factory);
}
}

View File

@ -48,6 +48,8 @@ void registerTableFunctionSQLite(TableFunctionFactory & factory);
void registerTableFunctionDictionary(TableFunctionFactory & factory);
void registerTableFunctionFormat(TableFunctionFactory & factory);
void registerTableFunctions();
}

View File

@ -3,8 +3,6 @@ import pytest
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
from helpers.test_tools import assert_eq_with_retry
import time
def fill_nodes(nodes, shard):
for node in nodes:
@ -59,20 +57,25 @@ def test_inconsistent_parts_if_drop_while_replica_not_active(start_cluster):
# DROP_RANGE will be removed from the replication log and the first replica will be lost
for i in range(20):
node2.query("INSERT INTO test_table VALUES ('2019-08-16', {})".format(20 + i))
assert_eq_with_retry(node2, "SELECT value FROM system.zookeeper WHERE path='/clickhouse/tables/test1/replicated/replicas/node1' AND name='is_lost'", "1")
for i in range(30):
if node2.contains_in_log("Will mark replica node1 as lost"):
break
time.sleep(0.5)
node2.wait_for_log_line("Will mark replica node1 as lost")
# the first replica will be cloned from the second
pm.heal_all()
node2.wait_for_log_line("Sending part")
assert_eq_with_retry(node1, "SELECT count(*) FROM test_table", node2.query("SELECT count(*) FROM test_table"))
# ensure replica was cloned
assert node1.contains_in_log("Will mimic node2")
# queue must be empty (except some merges that are possibly executing right now)
assert node1.query("SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'") == "0\n"
assert node2.query("SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'") == "0\n"
# 2 options:
# - There wasn't a merge in node2. Then node1 should have cloned the 2 parts
# - There was a merge in progress. node1 might have cloned the new part but still has the original 2 parts
# in the replication queue until they are finally discarded with a message like:
# `Skipping action for part 201908_40_40_0 because part 201908_21_40_4 already exists.`
#
# In any case after a short while the replication queue should be empty
assert_eq_with_retry(node1, "SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'", "0")
assert_eq_with_retry(node2, "SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'", "0")

View File

@ -177,13 +177,13 @@ def test_insert_query_delimiter():
assert query("SELECT a FROM t ORDER BY a") == "1\n5\n234\n"
def test_insert_default_column():
query("CREATE TABLE t (a UInt8, b Int32 DEFAULT 100, c String DEFAULT 'c') ENGINE = Memory")
query("CREATE TABLE t (a UInt8, b Int32 DEFAULT 100 - a, c String DEFAULT 'c') ENGINE = Memory")
query("INSERT INTO t (c, a) VALUES ('x',1),('y',2)")
query("INSERT INTO t (a) FORMAT TabSeparated", input_data="3\n4\n")
assert query("SELECT * FROM t ORDER BY a") == "1\t100\tx\n" \
"2\t100\ty\n" \
"3\t100\tc\n" \
"4\t100\tc\n"
assert query("SELECT * FROM t ORDER BY a") == "1\t99\tx\n" \
"2\t98\ty\n" \
"3\t97\tc\n" \
"4\t96\tc\n"
def test_insert_splitted_row():
query("CREATE TABLE t (a UInt8) ENGINE = Memory")
@ -257,7 +257,7 @@ def test_progress():
}
]"""
def test_session():
def test_session_settings():
session_a = "session A"
session_b = "session B"
query("SET custom_x=1", session_id=session_a)
@ -267,9 +267,22 @@ def test_session():
assert query("SELECT getSetting('custom_x'), getSetting('custom_y')", session_id=session_a) == "1\t2\n"
assert query("SELECT getSetting('custom_x'), getSetting('custom_y')", session_id=session_b) == "3\t4\n"
def test_session_temp_tables():
session_a = "session A"
session_b = "session B"
query("CREATE TEMPORARY TABLE my_temp_table(a Int8)", session_id=session_a)
query("INSERT INTO my_temp_table VALUES (10)", session_id=session_a)
assert query("SELECT * FROM my_temp_table", session_id=session_a) == "10\n"
query("CREATE TEMPORARY TABLE my_temp_table(a Int8)", session_id=session_b)
query("INSERT INTO my_temp_table VALUES (20)", session_id=session_b)
assert query("SELECT * FROM my_temp_table", session_id=session_b) == "20\n"
assert query("SELECT * FROM my_temp_table", session_id=session_a) == "10\n"
def test_no_session():
e = query_and_get_error("SET custom_x=1")
assert "There is no session" in e.display_text
e = query_and_get_error("CREATE TEMPORARY TABLE my_temp_table(a Int8)")
assert "There is no session" in e.display_text
def test_input_function():
query("CREATE TABLE t (a UInt8) ENGINE = Memory")

View File

@ -1,4 +1,4 @@
CREATE DATABASE memory_01069\nENGINE = Memory()
CREATE DATABASE memory_01069\nENGINE = Memory
1
2
3

View File

@ -0,0 +1,19 @@
3
SELECT CAST([3, 4, 5][1], \'Int32\')
4
SELECT CAST(CAST(\'[3,4,5]\', \'Array(Int64)\')[2], \'Int8\')
0
1
2
SELECT CAST(CAST(\'[1,2,3]\', \'Array(UInt64)\')[CAST(CAST([number, number], \'Array(UInt8)\')[number], \'UInt64\')], \'UInt8\')
FROM numbers(3)
3
WITH [3, 4, 5] AS x
SELECT CAST(x[1], \'Int32\')
3
SELECT CAST((3, 4, 5).1, \'Int32\')
4
SELECT CAST(CAST((3, 4, 5), \'Tuple(UInt64, UInt64, UInt64)\').1, \'Int32\')
3
WITH (3, 4, 5) AS x
SELECT CAST(x.1, \'Int32\')

View File

@ -0,0 +1,20 @@
SELECT [3,4,5][1]::Int32;
EXPLAIN SYNTAX SELECT [3,4,5][1]::Int32;
SELECT [3,4,5]::Array(Int64)[2]::Int8;
EXPLAIN SYNTAX SELECT [3,4,5]::Array(Int64)[2]::Int8;
SELECT [1,2,3]::Array(UInt64)[[number, number]::Array(UInt8)[number]::UInt64]::UInt8 from numbers(3);
EXPLAIN SYNTAX SELECT [1,2,3]::Array(UInt64)[[number, number]::Array(UInt8)[number]::UInt64]::UInt8 from numbers(3);
WITH [3,4,5] AS x SELECT x[1]::Int32;
EXPLAIN SYNTAX WITH [3,4,5] AS x SELECT x[1]::Int32;
SELECT tuple(3,4,5).1::Int32;
EXPLAIN SYNTAX SELECT tuple(3,4,5).1::Int32;
SELECT tuple(3,4,5)::Tuple(UInt64, UInt64, UInt64).2::Int32;
EXPLAIN SYNTAX SELECT tuple(3,4,5)::Tuple(UInt64, UInt64, UInt64).1::Int32;
WITH tuple(3,4,5) AS x SELECT x.1::Int32;
EXPLAIN SYNTAX WITH tuple(3,4,5) AS x SELECT x.1::Int32;

View File

@ -1,5 +1,5 @@
engine : Memory
CREATE DATABASE default\nENGINE = Memory()\nCOMMENT \'Test DB with comment\'
CREATE DATABASE default\nENGINE = Memory\nCOMMENT \'Test DB with comment\'
comment= Test DB with comment
engine : Atomic

View File

@ -0,0 +1,14 @@
String
abc
def
1
-1
10000
-10000
1000000
1 string [1,2,-1]
-10 def [10,20,10000]
1 \N [1,2,-1]
\N def [10,NULL,10000]
(1,'1') 10
(-1,'-1') 1000000

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "select * from values('String', 'abc', 'def')"
$CLICKHOUSE_CLIENT -q "select * from values(1, -1, 10000, -10000, 1000000)"
$CLICKHOUSE_CLIENT -q "select * from values((1, 'string', [1, 2, -1]), (-10, 'def', [10, 20, 10000]))"
$CLICKHOUSE_CLIENT -q "select * from values((1, NULL, [1, 2, -1]), (NULL, 'def', [10, NULL, 10000]))"
$CLICKHOUSE_CLIENT -q "select * from values(((1, '1'), 10), ((-1, '-1'), 1000000))"

View File

@ -0,0 +1,4 @@
5e7084e0-019f-461f-9e70-84e0019f561f
5e7084e0-019f-461f-9e70-84e0019f561f
5e7084e0-019f-461f-9e70-84e0019f561f
5e7084e0-019f-461f-9e70-84e0019f561f UUID

View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str'"
$CLICKHOUSE_CLIENT -q "select * from file('uuid_str.msgpack', 'MsgPack', 'uuid UUID')"
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin'"
$CLICKHOUSE_CLIENT -q "select * from file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID')"
$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext'"
$CLICKHOUSE_CLIENT -q "select * from file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID')"
$CLICKHOUSE_CLIENT -q "select c1, toTypeName(c1) from file('uuid_ext.msgpack') settings input_format_msgpack_number_of_columns=1"

View File

@ -0,0 +1,52 @@
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
111 Hello
123 World
1 2 [1,2,3] [['abc'],[],['d','e']]
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Array(Nullable(Float64))
c4 Array(Array(Nullable(String)))
111 Hello
123 World
111 Hello
131 Hello
123 World
b Nullable(Float64)
a Nullable(String)

View File

@ -0,0 +1,70 @@
-- Tags: no-fasttest
select * from format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
$$);
set max_block_size=5;
select * from format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
$$);
select * from format(CSV, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"');
desc format(CSV, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"');
drop table if exists test;
create table test as format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 111}
{"a": "Hello", "b": 131}
{"a": "World", "b": 123}
$$);
select * from test;
desc table test;
drop table test;

View File

@ -0,0 +1,7 @@
[1] [[1]]
[[1]]
[(1,[1])]
[[1]]
(('a',1),'b')
a 1
a b

View File

@ -0,0 +1,33 @@
DROP TABLE IF EXISTS t_nested_with_dots;
CREATE TABLE t_nested_with_dots (n Nested(id UInt64, `values.id` Array(UInt64)))
ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_nested_with_dots VALUES ([1], [[1]]);
SELECT * FROM t_nested_with_dots;
SELECT n.values.id FROM t_nested_with_dots;
DROP TABLE IF EXISTS t_nested_with_dots;
SET flatten_nested = 0;
CREATE TABLE t_nested_with_dots (n Nested(id UInt64, `values.id` Array(UInt64)))
ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_nested_with_dots VALUES ([(1, [1])]);
SELECT * FROM t_nested_with_dots;
SELECT n.values.id FROM t_nested_with_dots;
DROP TABLE IF EXISTS t_nested_with_dots;
CREATE TABLE t_nested_with_dots (`t.t2` Tuple(`t3.t4.t5` Tuple(`s1.s2` String, `u1.u2` UInt64), `s3.s4.s5` String))
ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_nested_with_dots VALUES ((('a', 1), 'b'));
SELECT * FROM t_nested_with_dots;
SELECT t.t2.t3.t4.t5.s1.s2, t.t2.t3.t4.t5.u1.u2 FROM t_nested_with_dots;
SELECT t.t2.t3.t4.t5.s1.s2, t.t2.s3.s4.s5 FROM t_nested_with_dots;
DROP TABLE IF EXISTS t_nested_with_dots;

View File

@ -0,0 +1 @@
CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory

View File

@ -0,0 +1 @@
SHOW CREATE DATABASE INFORMATION_SCHEMA;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.9 KiB

After

Width:  |  Height:  |  Size: 40 KiB