From 5aa968c2217ea7cd9451bf402f20d3b285a50858 Mon Sep 17 00:00:00 2001 From: Big Elephant Date: Fri, 26 Jul 2019 09:36:11 +0800 Subject: [PATCH 01/44] Update ipv4.md --- docs/zh/data_types/domains/ipv4.md | 80 +++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/docs/zh/data_types/domains/ipv4.md b/docs/zh/data_types/domains/ipv4.md index eb4cc7d57b5..baecb7d86b1 120000 --- a/docs/zh/data_types/domains/ipv4.md +++ b/docs/zh/data_types/domains/ipv4.md @@ -1 +1,79 @@ -../../../en/data_types/domains/ipv4.md \ No newline at end of file +## IPv4 + +`IPv4` 是基于 `UInt32` 的domain类型,用来存储IPv4地址。它使用紧凑的存储方式,提供用户友好的输入输出格式, +自动检查列类型。 + +### Basic Usage + +``` sql +CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY url; + +DESCRIBE TABLE hits; +``` + +``` +┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐ +│ url │ String │ │ │ │ │ +│ from │ IPv4 │ │ │ │ │ +└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ +``` + +或者你可以使用IPv4 domain作主键: + +``` sql +CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; +``` + +`IPv4` domain支持定制化的IPv4地址字符串格式: + +``` sql +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.yandex', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); + +SELECT * FROM hits; +``` + +``` +┌─url────────────────────────────────┬───────────from─┐ +│ https://clickhouse.yandex/docs/en/ │ 116.106.34.242 │ +│ https://wikipedia.org │ 116.253.40.133 │ +│ https://clickhouse.yandex │ 183.247.232.58 │ +└────────────────────────────────────┴────────────────┘ +``` + +数据值以紧凑的二进制格式存储: + +``` sql +SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(from)─┬─hex(from)─┐ +│ IPv4 │ B7F7E83A │ +└──────────────────┴───────────┘ +``` + +Domain不可隐式转换为除`UInt32`以外的类型。如果要将IPv4值转换为字符串,则必须使用`IPv4NumToString()`函数显示的进行此操作。 + +``` sql +SELECT toTypeName(s), IPv4NumToString(from) as s FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(IPv4NumToString(from))─┬─s──────────────┐ +│ String │ 183.247.232.58 │ +└───────────────────────────────────┴────────────────┘ +``` + +或转换为 `UInt32` 类型: + +``` sql +SELECT toTypeName(i), CAST(from as UInt32) as i FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(CAST(from, 'UInt32'))─┬──────────i─┐ +│ UInt32 │ 3086477370 │ +└──────────────────────────────────┴────────────┘ +``` + +[Original article](https://clickhouse.yandex/docs/en/data_types/domains/ipv4) From 24498668a05c421ea563101608834692c23509b6 Mon Sep 17 00:00:00 2001 From: Big Elephant Date: Mon, 29 Jul 2019 14:24:11 +0800 Subject: [PATCH 02/44] Update overview.md --- docs/zh/data_types/domains/overview.md | 27 +++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/docs/zh/data_types/domains/overview.md b/docs/zh/data_types/domains/overview.md index 13465d655ee..25fc5d3850a 120000 --- a/docs/zh/data_types/domains/overview.md +++ b/docs/zh/data_types/domains/overview.md @@ -1 +1,26 @@ -../../../en/data_types/domains/overview.md \ No newline at end of file +# Domains + +Domains是特殊用途的类型,它在现有的基础类型之上添加了一些额外的特性,能够让线上和磁盘上的表格式保持不变。目前,ClickHouse暂不支持自定义的domains. + +您可以在任何地方使用domains,相应的基础类型的使用方式如下: + +* 构建一列domain类型的数据 +* 从/向domain列读/写数据 +* 作为索引,如果基础类型能够被作为索引的话 +* 以domain列的值作为参数调用函数 +* 等等. + +### Domains的额外特性 + +* 在使用`SHOW CREATE TABLE` 或 `DESCRIBE TABLE`时,明确地显示列类型名称 +* 使用 `INSERT INTO domain_table(domain_column) VALUES(...)`实现人性化格式输入 +* 使用`SELECT domain_column FROM domain_table`实现人性化格式输出 +* 使用 `INSERT INTO domain_table FORMAT CSV ...`实现外部源数据的人性化格式载入 + +### 缺陷 + +* 无法通过 `ALTER TABLE`将基础类型的索引转换为domain类型的索引. +* 当从其他列或表插入数据时,无法将string类型的值隐式地转换为domain类型的值. +* 无法对存储为domain类型的值添加约束. + +[Original article](https://clickhouse.yandex/docs/en/data_types/domains/overview) From 98b5c741315c96f9dee2f5b63d8473c7b0a959f3 Mon Sep 17 00:00:00 2001 From: Big Elephant Date: Mon, 29 Jul 2019 14:33:46 +0800 Subject: [PATCH 03/44] Update ipv6.md --- docs/zh/data_types/domains/ipv6.md | 78 +++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/docs/zh/data_types/domains/ipv6.md b/docs/zh/data_types/domains/ipv6.md index cca37a22458..13c6809e4a9 120000 --- a/docs/zh/data_types/domains/ipv6.md +++ b/docs/zh/data_types/domains/ipv6.md @@ -1 +1,77 @@ -../../../en/data_types/domains/ipv6.md \ No newline at end of file +## IPv6 + +`IPv6` 是基于`FixedString(16)` 类型的domain类型,用来存储IPv6地址值。它使用紧凑的存储方式,提供用户友好的输入输出格式, 自动检查列类型。 + +### 基本用法 + +``` sql +CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY url; + +DESCRIBE TABLE hits; +``` + +``` +┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐ +│ url │ String │ │ │ │ │ +│ from │ IPv6 │ │ │ │ │ +└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ +``` + +您也可以使用 `IPv6`domain做主键: + +``` sql +CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; +``` + +`IPv6` domain支持定制化的IPv6地址字符串格式: + +``` sql +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.yandex', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); + +SELECT * FROM hits; +``` + +``` +┌─url────────────────────────────────┬─from──────────────────────────┐ +│ https://clickhouse.yandex │ 2001:44c8:129:2632:33:0:252:2 │ +│ https://clickhouse.yandex/docs/en/ │ 2a02:e980:1e::1 │ +│ https://wikipedia.org │ 2a02:aa08:e000:3100::2 │ +└────────────────────────────────────┴───────────────────────────────┘ +``` + +它以紧凑的二进制格式存储数值: + +``` sql +SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(from)─┬─hex(from)────────────────────────┐ +│ IPv6 │ 200144C8012926320033000002520002 │ +└──────────────────┴──────────────────────────────────┘ +``` +Domain不可隐式转换为除`FixedString(16)`以外的类型。如果要将`IPv6`值转换为字符串,则必须使用`IPv6NumToString()`函数显示地进行此操作: + +``` sql +SELECT toTypeName(s), IPv6NumToString(from) as s FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(IPv6NumToString(from))─┬─s─────────────────────────────┐ +│ String │ 2001:44c8:129:2632:33:0:252:2 │ +└───────────────────────────────────┴───────────────────────────────┘ +``` + +或转换为 `FixedString(16)`类型: + +``` sql +SELECT toTypeName(i), CAST(from as FixedString(16)) as i FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(CAST(from, 'FixedString(16)'))─┬─i───────┐ +│ FixedString(16) │ ��� │ +└───────────────────────────────────────────┴─────────┘ +``` + +[Original article](https://clickhouse.yandex/docs/en/data_types/domains/ipv6) From bb16ffaadc6e4ed7778c2cef3272370d5a1e0d73 Mon Sep 17 00:00:00 2001 From: Big Elephant Date: Mon, 29 Jul 2019 14:35:08 +0800 Subject: [PATCH 04/44] Update ipv4.md --- docs/zh/data_types/domains/ipv4.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/data_types/domains/ipv4.md b/docs/zh/data_types/domains/ipv4.md index baecb7d86b1..1dd2cb03794 120000 --- a/docs/zh/data_types/domains/ipv4.md +++ b/docs/zh/data_types/domains/ipv4.md @@ -3,7 +3,7 @@ `IPv4` 是基于 `UInt32` 的domain类型,用来存储IPv4地址。它使用紧凑的存储方式,提供用户友好的输入输出格式, 自动检查列类型。 -### Basic Usage +### 基本使用 ``` sql CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY url; @@ -18,7 +18,7 @@ DESCRIBE TABLE hits; └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ ``` -或者你可以使用IPv4 domain作主键: +您也可以使用IPv4 domain作主键: ``` sql CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; From 4f146eaa7e237a86ea3b8fa503644c04b1d758c6 Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Tue, 30 Jul 2019 18:54:50 +0800 Subject: [PATCH 05/44] added bitmapRange function --- .../AggregateFunctionGroupBitmapData.h | 38 ++++++ dbms/src/Functions/FunctionsBitmap.cpp | 1 + dbms/src/Functions/FunctionsBitmap.h | 113 ++++++++++++++++++ dbms/src/IO/tests/write_buffer_aio.cpp | 1 + .../00829_bitmap_function.reference | 8 ++ .../0_stateless/00829_bitmap_function.sql | 33 +++-- .../functions/bitmap_functions.md | 26 ++++ .../functions/bitmap_functions.md | 50 ++++++++ 8 files changed, 263 insertions(+), 7 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index e770dfee7fa..7e8fccbd82a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -454,6 +455,43 @@ public: return count; } + /** + * Return new set with specified range (not include the range_end) + */ + UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet& r1) const + { + UInt64 count = 0; + if(range_start >= range_end) + return count; + if (isSmall()) + { + std::vector ans; + for (const auto & x : small) + { + T val = x.getValue(); + if((UInt32)val >= range_start && (UInt32)val < range_end) { + r1.add(val); + count++; + } + } + } + else + { + roaring_uint32_iterator_t iterator; + roaring_init_iterator(rb, &iterator); + roaring_move_uint32_iterator_equalorlarger(&iterator, range_start); + while (iterator.has_value) + { + if((UInt32)iterator.current_value >= range_end) + break; + r1.add(iterator.current_value); + roaring_advance_uint32_iterator(&iterator); + count++; + } + } + return count; + } + private: /// To read and write the DB Buffer directly, migrate code from CRoaring void db_roaring_bitmap_add_many(DB::ReadBuffer & dbBuf, roaring_bitmap_t * r, size_t n_args) diff --git a/dbms/src/Functions/FunctionsBitmap.cpp b/dbms/src/Functions/FunctionsBitmap.cpp index d4367ae2c64..af7daddd773 100644 --- a/dbms/src/Functions/FunctionsBitmap.cpp +++ b/dbms/src/Functions/FunctionsBitmap.cpp @@ -9,6 +9,7 @@ void registerFunctionsBitmap(FunctionFactory & factory) { factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h index f64f04789cc..ec6daaab97b 100644 --- a/dbms/src/Functions/FunctionsBitmap.h +++ b/dbms/src/Functions/FunctionsBitmap.h @@ -30,6 +30,9 @@ namespace ErrorCodes * Convert bitmap to integer array: * bitmapToArray: bitmap -> integer[] * + * Return new set with specified range (not include the range_end): + * bitmapRange: bitmap,integer,integer -> bitmap + * * Two bitmap and calculation: * bitmapAnd: bitmap,bitmap -> bitmap * @@ -240,6 +243,116 @@ private: } }; +class FunctionBitmapRange : public IFunction +{ +public: + static constexpr auto name = "bitmapRange"; + + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return false; } + + size_t getNumberOfArguments() const override { return 3; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const DataTypeAggregateFunction * bitmap_type = typeid_cast(arguments[0].get()); + if (!(bitmap_type && bitmap_type->getFunctionName() == AggregateFunctionGroupBitmapData::name())) + throw Exception( + "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + auto arg_type1 = typeid_cast *>(arguments[1].get()); + if (!(arg_type1)) + throw Exception( + "Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + auto arg_type2 = typeid_cast *>(arguments[1].get()); + if (!(arg_type2)) + throw Exception( + "Third argument for function " + getName() + " must be UInt32 but it has type " + arguments[2]->getName() + ".", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return arguments[0]; + } + + bool useDefaultImplementationForConstants() const override { return true; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); + const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); + WhichDataType which(aggr_type->getArgumentsDataTypes()[0]); + if (which.isUInt8()) + executeIntType(block, arguments, result, input_rows_count); + else if (which.isUInt16()) + executeIntType(block, arguments, result, input_rows_count); + else if (which.isUInt32()) + executeIntType(block, arguments, result, input_rows_count); + else if (which.isUInt64()) + executeIntType(block, arguments, result, input_rows_count); + else + throw Exception( + "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + +private: + using ToType = UInt64; + + template + void executeIntType( + Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) + const + { + const IColumn * columns[3]; + bool is_column_const[3]; + const ColumnAggregateFunction * colAggFunc; + const PaddedPODArray * container0; + const PaddedPODArray * container1, * container2; + + for (size_t i = 0; i < 3; ++i) + { + columns[i] = block.getByPosition(arguments[i]).column.get(); + is_column_const[i] = isColumnConst(*columns[i]); + } + if (is_column_const[0]) { + colAggFunc = typeid_cast(typeid_cast(columns[0])->getDataColumnPtr().get()); + } else{ + colAggFunc = typeid_cast(columns[0]); + } + container0 = &colAggFunc->getData(); + if (is_column_const[1]) + container1 = &typeid_cast(typeid_cast(columns[1])->getDataColumnPtr().get())->getData(); + else + container1 = &typeid_cast(columns[1])->getData(); + if (is_column_const[2]) + container2 = &typeid_cast(typeid_cast(columns[2])->getDataColumnPtr().get())->getData(); + else + container2 = &typeid_cast(columns[2])->getData(); + + auto col_to = ColumnAggregateFunction::create(colAggFunc->getAggregateFunction()); + col_to->reserve(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) + { + const AggregateDataPtr dataPtr0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; + const AggregateFunctionGroupBitmapData& bd0 + = *reinterpret_cast*>(dataPtr0); + const UInt32 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i]; + const UInt32 range_end = is_column_const[2] ? (*container2)[0] : (*container2)[i]; + + auto bd2 = new AggregateFunctionGroupBitmapData(); + bd0.rbs.rb_range(range_start, range_end, bd2->rbs); + + col_to->insertFrom(reinterpret_cast(bd2)); + } + block.getByPosition(result).column = std::move(col_to); + } +}; + template class FunctionBitmapSelfCardinalityImpl : public IFunction { diff --git a/dbms/src/IO/tests/write_buffer_aio.cpp b/dbms/src/IO/tests/write_buffer_aio.cpp index 5794e277848..30596045616 100644 --- a/dbms/src/IO/tests/write_buffer_aio.cpp +++ b/dbms/src/IO/tests/write_buffer_aio.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.reference b/dbms/tests/queries/0_stateless/00829_bitmap_function.reference index bd5d279919b..da1206bab12 100644 --- a/dbms/tests/queries/0_stateless/00829_bitmap_function.reference +++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.reference @@ -59,3 +59,11 @@ 1 0 1 +[] +[] +[1] +[] +[5] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33] +[30,31,32,33,100] +[100] diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql index 19d00c68415..f03239a1378 100644 --- a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql +++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql @@ -177,18 +177,37 @@ select bitmapHasAll(bitmapBuild([ -- bitmapContains: ---- Empty -SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), CAST(0, 'UInt32')); -SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), CAST(5, 'UInt32')); +SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), toUInt32(0)); +SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), toUInt32(5)); ---- Small -select bitmapContains(bitmapBuild([1,5,7,9]),CAST(0, 'UInt32')); -select bitmapContains(bitmapBuild([1,5,7,9]),CAST(9, 'UInt32')); +select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(0)); +select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(9)); ---- Large select bitmapContains(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, - 100,200,500]),CAST(100, 'UInt32')); + 100,200,500]),toUInt32(100)); select bitmapContains(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, - 100,200,500]),CAST(101, 'UInt32')); + 100,200,500]),toUInt32(101)); select bitmapContains(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, - 100,200,500]),CAST(500, 'UInt32')); + 100,200,500]),toUInt32(500)); + +-- bitmapRange: +---- Empty +SELECT bitmapToArray(bitmapRange(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10))); +SELECT bitmapToArray(bitmapRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10))); +---- Small +select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4))); +select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10))); +select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7))); +---- Large +select bitmapToArray(bitmapRange(bitmapBuild([ + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, + 100,200,500]), toUInt32(0), toUInt32(100))); +select bitmapToArray(bitmapRange(bitmapBuild([ + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, + 100,200,500]), toUInt32(30), toUInt32(200))); +select bitmapToArray(bitmapRange(bitmapBuild([ + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, + 100,200,500]), toUInt32(100), toUInt32(200))); diff --git a/docs/en/query_language/functions/bitmap_functions.md b/docs/en/query_language/functions/bitmap_functions.md index 27f371841af..60fc7bb15f6 100644 --- a/docs/en/query_language/functions/bitmap_functions.md +++ b/docs/en/query_language/functions/bitmap_functions.md @@ -56,6 +56,32 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res └─────────────┘ ``` +## bitmapRange {#bitmap_functions-bitmaprange} + +Return new set with specified range (not include the range_end). + +``` +bitmapRange(bitmap, range_start, range_end) +``` + +**Parameters** + +- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `range_start` – range start point. Type: [UInt32](../../data_types/int_uint.md). +- `range_end` – range end point(excluded). Type: [UInt32](../../data_types/int_uint.md). + +**Example** + +``` sql +SELECT bitmapToArray(bitmapRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res +``` + +``` +┌─res───────────────┐ +│ [30,31,32,33,100] │ +└───────────────────┘ +``` + ## bitmapContains {#bitmap_functions-bitmapcontains} Checks whether the bitmap contains an element. diff --git a/docs/zh/query_language/functions/bitmap_functions.md b/docs/zh/query_language/functions/bitmap_functions.md index ff05aecf9b3..950c64208db 100644 --- a/docs/zh/query_language/functions/bitmap_functions.md +++ b/docs/zh/query_language/functions/bitmap_functions.md @@ -51,6 +51,56 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res └─────────────┘ ``` +## bitmapRange + +将位图指定范围(不包含range_end)转换为另一个位图。 + +``` +bitmapRange(bitmap, range_start, range_end) +``` + +**参数** + +- `bitmap` – 位图对象. +- `range_start` – 范围起始点(含). +- `range_end` – 范围结束点(不含). + +**示例** + +``` sql +SELECT bitmapToArray(bitmapRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res +``` + +``` +┌─res───────────────┐ +│ [30,31,32,33,100] │ +└───────────────────┘ +``` + +## bitmapContains + +检查位图是否包含指定元素。 + +``` +bitmapContains(haystack, needle) +``` + +**参数** + +- `haystack` – 位图对象. +- `needle` – 元素,类型UInt32. + +**示例** + +``` sql +SELECT bitmapContains(bitmapBuild([1,5,7,9]), toUInt32(9)) AS res +``` +```text +┌─res─┐ +│ 1 │ +└─────┘ +``` + ## bitmapHasAny 与`hasAny(array,array)`类似,如果位图有任何公共元素则返回1,否则返回0。 From 64c42930e694d8fa29fe4508ae4554cc58b63e02 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Aug 2019 06:36:57 +0300 Subject: [PATCH 06/44] Using Danila Kutenin variant to make fastops working --- contrib/fastops | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/fastops b/contrib/fastops index d2c85c5d654..88752a5e03c 160000 --- a/contrib/fastops +++ b/contrib/fastops @@ -1 +1 @@ -Subproject commit d2c85c5d6549cfd648a7f31ef7b14341881ff8ae +Subproject commit 88752a5e03cf34639a4a37a4b41d8b463fffd2b5 From 98e6dbdd8775fcc1f4ee014e0a4fef11ef1fd4c5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 4 Aug 2019 13:58:43 +0300 Subject: [PATCH 07/44] Fixed CMake --- contrib/fastops-cmake/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/contrib/fastops-cmake/CMakeLists.txt b/contrib/fastops-cmake/CMakeLists.txt index 2d85d111526..0269d5603c2 100644 --- a/contrib/fastops-cmake/CMakeLists.txt +++ b/contrib/fastops-cmake/CMakeLists.txt @@ -3,9 +3,8 @@ set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/fastops) set(SRCS "") if(HAVE_AVX) - set (SRCS ${SRCS} ${LIBRARY_DIR}/fastops/avx/ops_avx.cpp ${LIBRARY_DIR}/fastops/core/FastIntrinsics.cpp) + set (SRCS ${SRCS} ${LIBRARY_DIR}/fastops/avx/ops_avx.cpp) set_source_files_properties(${LIBRARY_DIR}/fastops/avx/ops_avx.cpp PROPERTIES COMPILE_FLAGS "-mavx -DNO_AVX2") - set_source_files_properties(${LIBRARY_DIR}/fastops/core/FastIntrinsics.cpp PROPERTIES COMPILE_FLAGS "-mavx -DNO_AVX2") endif() if(HAVE_AVX2) From 09edfaf49c6fd3acb168afb8cb2bae6c64dd0753 Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Sun, 4 Aug 2019 22:48:48 +0800 Subject: [PATCH 08/44] let Style Check happy --- .../AggregateFunctions/AggregateFunctionGroupBitmapData.h | 7 ++++--- dbms/src/Functions/FunctionsBitmap.h | 7 +++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index 7e8fccbd82a..422d1f7a98f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -461,7 +461,7 @@ public: UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet& r1) const { UInt64 count = 0; - if(range_start >= range_end) + if (range_start >= range_end) return count; if (isSmall()) { @@ -469,7 +469,8 @@ public: for (const auto & x : small) { T val = x.getValue(); - if((UInt32)val >= range_start && (UInt32)val < range_end) { + if ((UInt32)val >= range_start && (UInt32)val < range_end) + { r1.add(val); count++; } @@ -482,7 +483,7 @@ public: roaring_move_uint32_iterator_equalorlarger(&iterator, range_start); while (iterator.has_value) { - if((UInt32)iterator.current_value >= range_end) + if ((UInt32)iterator.current_value >= range_end) break; r1.add(iterator.current_value); roaring_advance_uint32_iterator(&iterator); diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h index ec6daaab97b..3aaf8e0b497 100644 --- a/dbms/src/Functions/FunctionsBitmap.h +++ b/dbms/src/Functions/FunctionsBitmap.h @@ -318,9 +318,12 @@ private: columns[i] = block.getByPosition(arguments[i]).column.get(); is_column_const[i] = isColumnConst(*columns[i]); } - if (is_column_const[0]) { + if (is_column_const[0]) + { colAggFunc = typeid_cast(typeid_cast(columns[0])->getDataColumnPtr().get()); - } else{ + } + else + { colAggFunc = typeid_cast(columns[0]); } container0 = &colAggFunc->getData(); From 1e6972b38ee8efecefac45e0702c9d881dcbcf71 Mon Sep 17 00:00:00 2001 From: VDimir Date: Mon, 5 Aug 2019 02:24:16 +0300 Subject: [PATCH 09/44] Added optional message argument in throwIf (#5772) --- dbms/src/Functions/throwIf.cpp | 51 ++++++++++++++----- .../0_stateless/00602_throw_if.reference | 1 + .../queries/0_stateless/00602_throw_if.sh | 8 +-- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/dbms/src/Functions/throwIf.cpp b/dbms/src/Functions/throwIf.cpp index 15584aa26a7..16d7d553e87 100644 --- a/dbms/src/Functions/throwIf.cpp +++ b/dbms/src/Functions/throwIf.cpp @@ -1,9 +1,11 @@ #include #include #include +#include #include #include #include +#include namespace DB @@ -13,6 +15,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int FUNCTION_THROW_IF_VALUE_IS_NON_ZERO; } @@ -32,6 +35,7 @@ public: return name; } + bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 1; @@ -39,9 +43,21 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (!isNativeNumber(arguments.front())) + const size_t number_of_arguments = arguments.size(); + + if (number_of_arguments < 1 || number_of_arguments > 2) + throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " + + toString(number_of_arguments) + ", should be 1 or 2", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + if (!isNativeNumber(arguments[0])) throw Exception{"Argument for function " + getName() + " must be number", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + if (number_of_arguments > 1 && !isString(arguments[1])) + throw Exception{"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return std::make_shared(); } @@ -49,29 +65,38 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { + std::optional custom_message; + if (arguments.size() == 2) { + auto * msg_column = checkAndGetColumnConst(block.getByPosition(arguments[1]).column.get()); + if (!msg_column) + throw Exception{"Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN}; + custom_message = msg_column->getValue(); + } + const auto in = block.getByPosition(arguments.front()).column.get(); - if ( !execute(block, in, result) - && !execute(block, in, result) - && !execute(block, in, result) - && !execute(block, in, result) - && !execute(block, in, result) - && !execute(block, in, result) - && !execute(block, in, result) - && !execute(block, in, result) - && !execute(block, in, result) - && !execute(block, in, result)) + if ( !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message) + && !execute(block, in, result, custom_message)) throw Exception{"Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } template - bool execute(Block & block, const IColumn * in_untyped, const size_t result) + bool execute(Block & block, const IColumn * in_untyped, const size_t result, const std::optional & message) { if (const auto in = checkAndGetColumn>(in_untyped)) { const auto & in_data = in->getData(); if (!memoryIsZero(in_data.data(), in_data.size() * sizeof(in_data[0]))) - throw Exception("Value passed to 'throwIf' function is non zero", ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO); + throw Exception{message.value_or("Value passed to '" + getName() + "' function is non zero"), + ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO}; /// We return non constant to avoid constant folding. block.getByPosition(result).column = ColumnUInt8::create(in_data.size(), 0); diff --git a/dbms/tests/queries/0_stateless/00602_throw_if.reference b/dbms/tests/queries/0_stateless/00602_throw_if.reference index d0752a77fc7..ad5aaee89a8 100644 --- a/dbms/tests/queries/0_stateless/00602_throw_if.reference +++ b/dbms/tests/queries/0_stateless/00602_throw_if.reference @@ -1,2 +1,3 @@ 1 +1 1000000 diff --git a/dbms/tests/queries/0_stateless/00602_throw_if.sh b/dbms/tests/queries/0_stateless/00602_throw_if.sh index 8dae5033978..3c790d900d3 100755 --- a/dbms/tests/queries/0_stateless/00602_throw_if.sh +++ b/dbms/tests/queries/0_stateless/00602_throw_if.sh @@ -3,7 +3,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -exception_pattern="Value passed to 'throwIf' function is non zero" +default_exception_message="Value passed to 'throwIf' function is non zero" +custom_exception_message="Number equals 1000000" -${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000000) FROM system.numbers" 2>&1 | grep -cF "$exception_pattern" -${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT sum(x = 0) FROM (SELECT throwIf(number = 1000000) AS x FROM numbers(1000000))" 2>&1 +${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000000) FROM system.numbers" 2>&1 | grep -cF "$default_exception_message" +${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000000, '$custom_exception_message') FROM system.numbers" 2>&1 | grep -cF "$custom_exception_message" +${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT sum(x = 0) FROM (SELECT throwIf(number = 1000000) AS x FROM numbers(1000000))" 2>&1 \ No newline at end of file From e1f3ef2115cdfdbbe49b4812dcfb7d4c0879d691 Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Mon, 5 Aug 2019 10:27:12 +0800 Subject: [PATCH 10/44] renamed bitmapRange to bitmapSubsetInRange --- dbms/src/Functions/FunctionsBitmap.cpp | 2 +- dbms/src/Functions/FunctionsBitmap.h | 10 +++++----- .../0_stateless/00829_bitmap_function.sql | 18 +++++++++--------- .../functions/bitmap_functions.md | 8 ++++---- .../functions/bitmap_functions.md | 6 +++--- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/dbms/src/Functions/FunctionsBitmap.cpp b/dbms/src/Functions/FunctionsBitmap.cpp index af7daddd773..b2bb1e4f25a 100644 --- a/dbms/src/Functions/FunctionsBitmap.cpp +++ b/dbms/src/Functions/FunctionsBitmap.cpp @@ -9,7 +9,7 @@ void registerFunctionsBitmap(FunctionFactory & factory) { factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h index 3aaf8e0b497..22964605b8b 100644 --- a/dbms/src/Functions/FunctionsBitmap.h +++ b/dbms/src/Functions/FunctionsBitmap.h @@ -30,8 +30,8 @@ namespace ErrorCodes * Convert bitmap to integer array: * bitmapToArray: bitmap -> integer[] * - * Return new set with specified range (not include the range_end): - * bitmapRange: bitmap,integer,integer -> bitmap + * Return subset in specified range (not include the range_end): + * bitmapSubsetInRange: bitmap,integer,integer -> bitmap * * Two bitmap and calculation: * bitmapAnd: bitmap,bitmap -> bitmap @@ -243,12 +243,12 @@ private: } }; -class FunctionBitmapRange : public IFunction +class FunctionBitmapSubsetInRange : public IFunction { public: - static constexpr auto name = "bitmapRange"; + static constexpr auto name = "bitmapSubsetInRange"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } diff --git a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql index f03239a1378..247a9ba3960 100644 --- a/dbms/tests/queries/0_stateless/00829_bitmap_function.sql +++ b/dbms/tests/queries/0_stateless/00829_bitmap_function.sql @@ -193,21 +193,21 @@ select bitmapContains(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, 100,200,500]),toUInt32(500)); --- bitmapRange: +-- bitmapSubsetInRange: ---- Empty -SELECT bitmapToArray(bitmapRange(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10))); -SELECT bitmapToArray(bitmapRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10))); +SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10))); +SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10))); ---- Small -select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4))); -select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10))); -select bitmapToArray(bitmapRange(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7))); +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4))); +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10))); +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7))); ---- Large -select bitmapToArray(bitmapRange(bitmapBuild([ +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, 100,200,500]), toUInt32(0), toUInt32(100))); -select bitmapToArray(bitmapRange(bitmapBuild([ +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, 100,200,500]), toUInt32(30), toUInt32(200))); -select bitmapToArray(bitmapRange(bitmapBuild([ +select bitmapToArray(bitmapSubsetInRange(bitmapBuild([ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33, 100,200,500]), toUInt32(100), toUInt32(200))); diff --git a/docs/en/query_language/functions/bitmap_functions.md b/docs/en/query_language/functions/bitmap_functions.md index 60fc7bb15f6..b0d21500035 100644 --- a/docs/en/query_language/functions/bitmap_functions.md +++ b/docs/en/query_language/functions/bitmap_functions.md @@ -56,12 +56,12 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res └─────────────┘ ``` -## bitmapRange {#bitmap_functions-bitmaprange} +## bitmapSubsetInRange {#bitmap_functions-bitmapsubsetinrange} -Return new set with specified range (not include the range_end). +Return subset in specified range (not include the range_end). ``` -bitmapRange(bitmap, range_start, range_end) +bitmapSubsetInRange(bitmap, range_start, range_end) ``` **Parameters** @@ -73,7 +73,7 @@ bitmapRange(bitmap, range_start, range_end) **Example** ``` sql -SELECT bitmapToArray(bitmapRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res +SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res ``` ``` diff --git a/docs/zh/query_language/functions/bitmap_functions.md b/docs/zh/query_language/functions/bitmap_functions.md index 950c64208db..97be4f38853 100644 --- a/docs/zh/query_language/functions/bitmap_functions.md +++ b/docs/zh/query_language/functions/bitmap_functions.md @@ -51,12 +51,12 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res └─────────────┘ ``` -## bitmapRange +## bitmapSubsetInRange 将位图指定范围(不包含range_end)转换为另一个位图。 ``` -bitmapRange(bitmap, range_start, range_end) +bitmapSubsetInRange(bitmap, range_start, range_end) ``` **参数** @@ -68,7 +68,7 @@ bitmapRange(bitmap, range_start, range_end) **示例** ``` sql -SELECT bitmapToArray(bitmapRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res +SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res ``` ``` From 259364a4f87a90bcc98ca51c31c7bf63201d53a8 Mon Sep 17 00:00:00 2001 From: VDimir Date: Mon, 5 Aug 2019 09:38:52 +0300 Subject: [PATCH 11/44] Minor fixes in throwIf implementation --- dbms/src/Functions/throwIf.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/throwIf.cpp b/dbms/src/Functions/throwIf.cpp index 16d7d553e87..dc4ac4950e8 100644 --- a/dbms/src/Functions/throwIf.cpp +++ b/dbms/src/Functions/throwIf.cpp @@ -38,7 +38,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { - return 1; + return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override @@ -62,11 +62,13 @@ public: } bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { std::optional custom_message; - if (arguments.size() == 2) { + if (arguments.size() == 2) + { auto * msg_column = checkAndGetColumnConst(block.getByPosition(arguments[1]).column.get()); if (!msg_column) throw Exception{"Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN}; From 0e09b7cc33c2f1464519ff3010de58d11c8937c4 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 5 Aug 2019 18:50:42 +0800 Subject: [PATCH 12/44] continue translate domain data type --- docs/zh/data_types/domains/ipv4.md | 15 ++++++------ docs/zh/data_types/domains/ipv6.md | 15 ++++++------ docs/zh/data_types/domains/overview.md | 32 +++++++++++++------------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/docs/zh/data_types/domains/ipv4.md b/docs/zh/data_types/domains/ipv4.md index 1dd2cb03794..4adf13409fe 120000 --- a/docs/zh/data_types/domains/ipv4.md +++ b/docs/zh/data_types/domains/ipv4.md @@ -1,7 +1,6 @@ ## IPv4 -`IPv4` 是基于 `UInt32` 的domain类型,用来存储IPv4地址。它使用紧凑的存储方式,提供用户友好的输入输出格式, -自动检查列类型。 +`IPv4`是与`UInt32`类型保持二进制兼容的Domain类型,其用于存储IPv4地址的值。它提供了更为紧凑的二进制存储的同时支持识别可读性更加友好的输入输出格式。 ### 基本使用 @@ -18,13 +17,13 @@ DESCRIBE TABLE hits; └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ ``` -您也可以使用IPv4 domain作主键: +同时您也可以使用`IPv4`类型的列作为主键: ``` sql CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; ``` -`IPv4` domain支持定制化的IPv4地址字符串格式: +在写入与查询时,`IPv4`类型能够识别可读性更加友好的输入输出格式: ``` sql INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.yandex', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); @@ -40,7 +39,7 @@ SELECT * FROM hits; └────────────────────────────────────┴────────────────┘ ``` -数据值以紧凑的二进制格式存储: +同时它提供更为紧凑的二进制存储格式: ``` sql SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; @@ -52,7 +51,7 @@ SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; └──────────────────┴───────────┘ ``` -Domain不可隐式转换为除`UInt32`以外的类型。如果要将IPv4值转换为字符串,则必须使用`IPv4NumToString()`函数显示的进行此操作。 +不可隐式转换为除`UInt32`以外的其他类型类型。如果要将`IPv4`类型的值转换成字符串,你可以使用`IPv4NumToString()`显示的进行转换: ``` sql SELECT toTypeName(s), IPv4NumToString(from) as s FROM hits LIMIT 1; @@ -64,7 +63,7 @@ SELECT toTypeName(s), IPv4NumToString(from) as s FROM hits LIMIT 1; └───────────────────────────────────┴────────────────┘ ``` -或转换为 `UInt32` 类型: +或可以使用`CAST`将它转换为`UInt32`类型: ``` sql SELECT toTypeName(i), CAST(from as UInt32) as i FROM hits LIMIT 1; @@ -76,4 +75,4 @@ SELECT toTypeName(i), CAST(from as UInt32) as i FROM hits LIMIT 1; └──────────────────────────────────┴────────────┘ ``` -[Original article](https://clickhouse.yandex/docs/en/data_types/domains/ipv4) +[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/ipv4) diff --git a/docs/zh/data_types/domains/ipv6.md b/docs/zh/data_types/domains/ipv6.md index 13c6809e4a9..1209350990f 120000 --- a/docs/zh/data_types/domains/ipv6.md +++ b/docs/zh/data_types/domains/ipv6.md @@ -1,6 +1,6 @@ ## IPv6 -`IPv6` 是基于`FixedString(16)` 类型的domain类型,用来存储IPv6地址值。它使用紧凑的存储方式,提供用户友好的输入输出格式, 自动检查列类型。 +`IPv6`是与`FixedString(16)`类型保持二进制兼容的Domain类型,其用于存储IPv6地址的值。它提供了更为紧凑的二进制存储的同时支持识别可读性更加友好的输入输出格式。 ### 基本用法 @@ -17,13 +17,13 @@ DESCRIBE TABLE hits; └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ ``` -您也可以使用 `IPv6`domain做主键: +同时您也可以使用`IPv6`类型的列作为主键: ``` sql CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; ``` -`IPv6` domain支持定制化的IPv6地址字符串格式: +在写入与查询时,`IPv6`类型能够识别可读性更加友好的输入输出格式: ``` sql INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.yandex', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); @@ -39,7 +39,7 @@ SELECT * FROM hits; └────────────────────────────────────┴───────────────────────────────┘ ``` -它以紧凑的二进制格式存储数值: +同时它提供更为紧凑的二进制存储格式: ``` sql SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; @@ -50,7 +50,8 @@ SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; │ IPv6 │ 200144C8012926320033000002520002 │ └──────────────────┴──────────────────────────────────┘ ``` -Domain不可隐式转换为除`FixedString(16)`以外的类型。如果要将`IPv6`值转换为字符串,则必须使用`IPv6NumToString()`函数显示地进行此操作: + +不可隐式转换为除`FixedString(16)`以外的其他类型类型。如果要将`IPv6`类型的值转换成字符串,你可以使用`IPv6NumToString()`显示的进行转换: ``` sql SELECT toTypeName(s), IPv6NumToString(from) as s FROM hits LIMIT 1; @@ -62,7 +63,7 @@ SELECT toTypeName(s), IPv6NumToString(from) as s FROM hits LIMIT 1; └───────────────────────────────────┴───────────────────────────────┘ ``` -或转换为 `FixedString(16)`类型: +或使用`CAST`将其转换为`FixedString(16)`: ``` sql SELECT toTypeName(i), CAST(from as FixedString(16)) as i FROM hits LIMIT 1; @@ -74,4 +75,4 @@ SELECT toTypeName(i), CAST(from as FixedString(16)) as i FROM hits LIMIT 1; └───────────────────────────────────────────┴─────────┘ ``` -[Original article](https://clickhouse.yandex/docs/en/data_types/domains/ipv6) +[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/ipv6) diff --git a/docs/zh/data_types/domains/overview.md b/docs/zh/data_types/domains/overview.md index 25fc5d3850a..b4db116e75b 120000 --- a/docs/zh/data_types/domains/overview.md +++ b/docs/zh/data_types/domains/overview.md @@ -1,26 +1,26 @@ # Domains -Domains是特殊用途的类型,它在现有的基础类型之上添加了一些额外的特性,能够让线上和磁盘上的表格式保持不变。目前,ClickHouse暂不支持自定义的domains. +Domain类型是特定实现的类型,它总是与某个现存的基础类型保持二进制兼容的同时添加一些额外的特性,以能够在维持磁盘数据不变的情况下使用这些额外的特性。目前ClickHouse暂不支持自定义domain类型。 -您可以在任何地方使用domains,相应的基础类型的使用方式如下: +如果你可以在一个地方使用与Domain类型二进制兼容的基础类型,那么在相同的地方您也可以使用Domain类型,例如: -* 构建一列domain类型的数据 -* 从/向domain列读/写数据 -* 作为索引,如果基础类型能够被作为索引的话 -* 以domain列的值作为参数调用函数 -* 等等. +* 使用Domain类型作为表中列的类型 +* 对Domain类型的列进行读/写数据 +* 如果与Domain二进制兼容的基础类型可以作为索引,那么Domain类型也可以作为索引 +* 将Domain类型作为参数传递给函数使用 +* 其他 ### Domains的额外特性 -* 在使用`SHOW CREATE TABLE` 或 `DESCRIBE TABLE`时,明确地显示列类型名称 -* 使用 `INSERT INTO domain_table(domain_column) VALUES(...)`实现人性化格式输入 -* 使用`SELECT domain_column FROM domain_table`实现人性化格式输出 -* 使用 `INSERT INTO domain_table FORMAT CSV ...`实现外部源数据的人性化格式载入 +* 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 +* 在INSERT INTO domain_table(domain_column) VALUES(...)中输入数据总是以更人性化的格式进行输入 +* 在SELECT domain_column FROM domain_table中数据总是以更人性化的格式输出 +* 在INSERT INTO domain_table FORMAT CSV ...中,实现外部源数据以更人性化的格式载入 -### 缺陷 +### Domains类型的限制 -* 无法通过 `ALTER TABLE`将基础类型的索引转换为domain类型的索引. -* 当从其他列或表插入数据时,无法将string类型的值隐式地转换为domain类型的值. -* 无法对存储为domain类型的值添加约束. +* 无法通过`ALTER TABLE`将基础类型的索引转换为Domain类型的索引。 +* 当从其他列或表插入数据时,无法将string类型的值隐式地转换为Domain类型的值。 +* 无法对存储为Domain类型的值添加约束。 -[Original article](https://clickhouse.yandex/docs/en/data_types/domains/overview) +[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/overview) From e58e019f50c27e018b7a3fe67b7dcdf2a1ee0169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Francisco=20Bar=C3=B3n?= Date: Mon, 5 Aug 2019 17:11:53 +0200 Subject: [PATCH 13/44] Fixed links in monitoring.md I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en Category: Document Short description (up to few sentences): Fixed link in the logger word Added link in the word guide --- docs/en/operations/monitoring.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index f90f647ccea..15a233079f7 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -21,7 +21,7 @@ It is highly recommended to set up monitoring for: ClickHouse server has embedded instruments for self-state monitoring. -To track server events use server logs. See the [logger](#server_settings-logger) section of the configuration file. +To track server events use server logs. See the [logger](server_settings/settings.md#server_settings-logger) section of the configuration file. ClickHouse collects: @@ -30,7 +30,7 @@ ClickHouse collects: You can find metrics in the [system.metrics](#system_tables-metrics), [system.events](#system_tables-events), and [system.asynchronous_metrics](#system_tables-asynchronous_metrics) tables. -You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official guide https://graphite.readthedocs.io/en/latest/install.html. +You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html). Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/`. If the server is available, it responds with `200 OK`. From d5fd3c0a8a8091eb85dfc4c63c6819b4a5864d8f Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 6 Aug 2019 00:25:26 +0800 Subject: [PATCH 14/44] build fix --- dbms/programs/server/Server.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index d97fe4aff2c..aff54b5e992 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -611,7 +611,7 @@ int Server::main(const std::vector & /*args*/) return socket_address; }; - auto socket_bind_listen = [&](auto & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = 0) + auto socket_bind_listen = [&](auto & socket, const std::string & host, [[maybe_unused]] UInt16 port, [[maybe_unused]] bool secure = 0) { auto address = make_socket_address(host, port); #if !defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION < 0x01090100 @@ -681,7 +681,7 @@ int Server::main(const std::vector & /*args*/) }); /// HTTPS - create_server("https_port", [&](UInt16 port) + create_server("https_port", [&]([[maybe_unused]]UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -718,7 +718,7 @@ int Server::main(const std::vector & /*args*/) }); /// TCP with SSL - create_server("tcp_port_secure", [&](UInt16 port) + create_server ("tcp_port_secure", [&]([[maybe_unused]]UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -753,7 +753,7 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for replica communication (interserver) http://" + address.toString()); }); - create_server("interserver_https_port", [&](UInt16 port) + create_server("interserver_https_port", [&]([[maybe_unused]]UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -773,7 +773,7 @@ int Server::main(const std::vector & /*args*/) #endif }); - create_server("mysql_port", [&](UInt16 port) + create_server("mysql_port", [&]([[maybe_unused]]UInt16 port) { #if USE_POCO_NETSSL Poco::Net::ServerSocket socket; From 03092d570492abf28fe28caec82b8fea60d6f59f Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 6 Aug 2019 00:53:11 +0800 Subject: [PATCH 15/44] build fix --- dbms/programs/server/Server.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index aff54b5e992..3f96562b656 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -681,7 +681,7 @@ int Server::main(const std::vector & /*args*/) }); /// HTTPS - create_server("https_port", [&]([[maybe_unused]]UInt16 port) + create_server("https_port", [&]([[maybe_unused]] UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -718,7 +718,7 @@ int Server::main(const std::vector & /*args*/) }); /// TCP with SSL - create_server ("tcp_port_secure", [&]([[maybe_unused]]UInt16 port) + create_server ("tcp_port_secure", [&]([[maybe_unused]] UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -753,7 +753,7 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for replica communication (interserver) http://" + address.toString()); }); - create_server("interserver_https_port", [&]([[maybe_unused]]UInt16 port) + create_server("interserver_https_port", [&]([[maybe_unused]] UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -773,7 +773,7 @@ int Server::main(const std::vector & /*args*/) #endif }); - create_server("mysql_port", [&]([[maybe_unused]]UInt16 port) + create_server("mysql_port", [&]([[maybe_unused]] UInt16 port) { #if USE_POCO_NETSSL Poco::Net::ServerSocket socket; From 75d10f4160077a33f2275640557d9cdb8ac29f8d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 5 Aug 2019 22:41:20 +0300 Subject: [PATCH 16/44] print extra info in exception message --- dbms/src/Common/Exception.cpp | 75 ++++++++++++++++++- dbms/src/Common/Exception.h | 3 +- .../src/Storages/MergeTree/DiskSpaceMonitor.h | 62 ++++++++++++++- 3 files changed, 133 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 958f254fe92..15371e950e5 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -9,6 +9,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -68,7 +71,66 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_ } } -std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace) +void getEnospcInfoMessage(std::filesystem::path path, std::string & msg) +{ + /// It's possible to get ENOSPC for non existent file (e.g. if there are no free inodes and creat() fails) + /// So try to get info for existent parent directory. + while (!std::filesystem::exists(path) && path.has_relative_path()) + path = path.parent_path(); + + /// Most likely path is invalid + if (!path.has_relative_path()) + return; + + auto fs = DiskSpaceMonitor::getStatvfs(path); + msg += "\nTotal space: " + formatReadableSizeWithBinarySuffix(fs.f_blocks * fs.f_bsize) + + "\nAvailable space: " + formatReadableSizeWithBinarySuffix(fs.f_bavail * fs.f_bsize) + + "\nTotal inodes: " + formatReadableQuantity(fs.f_files) + + "\nAvailable inodes: " + formatReadableQuantity(fs.f_favail); + + auto mount_point = DiskSpaceMonitor::getMountPoint(path).string(); + msg += "\nMount point: " + mount_point; + msg += "\nFilesystem: " + DiskSpaceMonitor::getFilesystemName(mount_point); +} + +std::string getAdditionalExceptionInfo(const std::exception & e) +{ + String msg; + try + { + if (auto file_exception = dynamic_cast(&e)) + { + if (file_exception->code() == ENOSPC) + getEnospcInfoMessage(file_exception->message(), msg); + } + else if (auto errno_exception = dynamic_cast(&e)) + { + if (errno_exception->getErrno() == ENOSPC) + { + /// Try to extract path from text exception message. Most likely the exception was thrown by + /// DB::throwFromErrno("Some message" + filename, ...); + /// We suppose "Some message " does not contain '/' and filename is an absolute path starts with '/'. + /// throwFromErrno appends ", errno: ..." to the first argument. + /// It's ugly hack which may not work correctly. However, getEnospcInfoMessage(...) checks if path exists. + size_t likely_path_begin = errno_exception->message().find('/'); + size_t likely_path_end = errno_exception->message().find(", errno: ", likely_path_begin); + if (likely_path_end != std::string::npos) + { + std::string supposed_to_be_path = errno_exception->message().substr(likely_path_begin, + likely_path_end - likely_path_begin); + getEnospcInfoMessage(supposed_to_be_path, msg); + } + } + } + } + catch (...) { + msg += "\nCannot print additional info: " + getCurrentExceptionMessage(false, false, false); + } + + return msg; +} + +std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace /*= false*/, bool with_additional_info /*= true*/) { std::stringstream stream; @@ -78,7 +140,9 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded } catch (const Exception & e) { - stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace) << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; + stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace) + << (with_additional_info ? getAdditionalExceptionInfo(e) : "") + << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; } catch (const Poco::Exception & e) { @@ -86,7 +150,8 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded { stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() << ", e.displayText() = " << e.displayText() - << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; + << (with_additional_info ? getAdditionalExceptionInfo(e) : "") + << " (version " << VERSION_STRING << VERSION_OFFICIAL; } catch (...) {} } @@ -100,7 +165,9 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what() << ", version = " << VERSION_STRING << VERSION_OFFICIAL; + stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what() + << (with_additional_info ? getAdditionalExceptionInfo(e) : "") + << ", version = " << VERSION_STRING << VERSION_OFFICIAL; } catch (...) {} } diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h index 6b0656f4828..2f4e85e32bd 100644 --- a/dbms/src/Common/Exception.h +++ b/dbms/src/Common/Exception.h @@ -87,7 +87,8 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_ * check_embedded_stacktrace - if DB::Exception has embedded stacktrace then * only this stack trace will be printed. */ -std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace = false); +std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace = false, + bool with_additional_info = true); /// Returns error code from ErrorCodes int getCurrentExceptionCode(); diff --git a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h index d518fea5490..1640374d390 100644 --- a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h +++ b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h @@ -2,7 +2,13 @@ #include #include +#include +#include +#include +#include +#include #include +#include #include #include #include @@ -23,6 +29,7 @@ namespace ErrorCodes { extern const int CANNOT_STATVFS; extern const int NOT_ENOUGH_SPACE; + extern const int SYSTEM_ERROR; } @@ -96,12 +103,17 @@ public: using ReservationPtr = std::unique_ptr; - static UInt64 getUnreservedFreeSpace(const std::string & path) + inline static struct statvfs getStatvfs(const std::string & path) { struct statvfs fs; - if (statvfs(path.c_str(), &fs) != 0) throwFromErrno("Could not calculate available disk space (statvfs)", ErrorCodes::CANNOT_STATVFS); + return fs; + } + + static UInt64 getUnreservedFreeSpace(const std::string & path) + { + struct statvfs fs = getStatvfs(path); UInt64 res = fs.f_bfree * fs.f_bsize; @@ -140,6 +152,52 @@ public: return std::make_unique(size); } + /// Returns mount point of filesystem where absoulte_path (must exists) is located + static std::filesystem::path getMountPoint(std::filesystem::path absolute_path) + { + if (absolute_path.is_relative()) + throw Exception("Path is relative. It's a bug.", ErrorCodes::LOGICAL_ERROR); + + absolute_path = std::filesystem::canonical(absolute_path); + + const auto get_device_id = [](const std::filesystem::path & p) + { + struct stat st; + if (stat(p.c_str(), &st)) + throwFromErrno("Cannot stat " + p.string(), errno); + return st.st_dev; + }; + + /// If /some/path/to/dir/ and /some/path/to/ have different device id, + /// then device which contains /some/path/to/dir/filename is mounted to /some/path/to/dir/ + auto device_id = get_device_id(absolute_path); + while (absolute_path.has_relative_path()) + { + auto parent = absolute_path.parent_path(); + auto parent_device_id = get_device_id(parent); + if (device_id != parent_device_id) + return absolute_path; + absolute_path = parent; + device_id = parent_device_id; + } + + return absolute_path; + } + + /// Returns name of filesystem mounted to mount_point + static std::string getFilesystemName(const std::string & mount_point) + { + auto mounted_filesystems = setmntent("/etc/mtab", "r"); + mntent fs_info; + constexpr size_t buf_size = 4096; /// The same as buffer used for getmntent in glibc. It can happen that it's not enough + char buf[buf_size]; + while (getmntent_r(mounted_filesystems, &fs_info, buf, buf_size) && fs_info.mnt_dir != mount_point); + endmntent(mounted_filesystems); + if (fs_info.mnt_dir != mount_point) + throw DB::Exception("Cannot find name of filesystem by mount point " + mount_point, ErrorCodes::SYSTEM_ERROR); + return fs_info.mnt_fsname; + } + private: static UInt64 reserved_bytes; static UInt64 reservation_count; From ffb053aa5f16022acb327695634c3dfeaa7ca144 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 6 Aug 2019 14:15:48 +0300 Subject: [PATCH 17/44] add test for crash described in #5859 --- .../00980_full_join_crash_fancyqlx.reference | 5 +++++ .../00980_full_join_crash_fancyqlx.sql | 15 +++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00980_full_join_crash_fancyqlx.reference create mode 100644 dbms/tests/queries/0_stateless/00980_full_join_crash_fancyqlx.sql diff --git a/dbms/tests/queries/0_stateless/00980_full_join_crash_fancyqlx.reference b/dbms/tests/queries/0_stateless/00980_full_join_crash_fancyqlx.reference new file mode 100644 index 00000000000..5399cf08165 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_full_join_crash_fancyqlx.reference @@ -0,0 +1,5 @@ +1 2019-01-01 a +1 2019-01-01 \N +1 2019-01-01 \N +2 2019-01-01 b +3 2019-01-01 c diff --git a/dbms/tests/queries/0_stateless/00980_full_join_crash_fancyqlx.sql b/dbms/tests/queries/0_stateless/00980_full_join_crash_fancyqlx.sql new file mode 100644 index 00000000000..2514b6ded1b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_full_join_crash_fancyqlx.sql @@ -0,0 +1,15 @@ +drop table if exists test_join; + +create table test_join (date Date, id Int32, name Nullable(String)) engine = MergeTree partition by date order by id; + +insert into test_join values ('2019-01-01', 1, 'a'); +insert into test_join values ('2019-01-01', 2, 'b'); +insert into test_join values ('2019-01-01', 3, 'c'); +insert into test_join values ('2019-01-01', 1, null); + +SELECT id, date, name FROM (SELECT id, date, name FROM test_join GROUP BY id, name, date) +FULL OUTER JOIN (SELECT id, date, name FROM test_join GROUP BY id, name, date) +USING (id, name, date) +ORDER BY id, name; + +drop table test_join; From c96b10be9f10f4415114f4a3165669a9d19435fb Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 6 Aug 2019 15:51:10 +0300 Subject: [PATCH 18/44] style fixes --- dbms/src/Common/Exception.cpp | 18 +++++++++--------- dbms/src/Common/Exception.h | 2 +- dbms/src/Storages/MergeTree/DiskSpaceMonitor.h | 9 +++++---- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 15371e950e5..320b079774e 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -71,7 +71,7 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_ } } -void getEnospcInfoMessage(std::filesystem::path path, std::string & msg) +void getNoSpaceLeftInfoMessage(std::filesystem::path path, std::string & msg) { /// It's possible to get ENOSPC for non existent file (e.g. if there are no free inodes and creat() fails) /// So try to get info for existent parent directory. @@ -82,7 +82,7 @@ void getEnospcInfoMessage(std::filesystem::path path, std::string & msg) if (!path.has_relative_path()) return; - auto fs = DiskSpaceMonitor::getStatvfs(path); + auto fs = DiskSpaceMonitor::getStatVFS(path); msg += "\nTotal space: " + formatReadableSizeWithBinarySuffix(fs.f_blocks * fs.f_bsize) + "\nAvailable space: " + formatReadableSizeWithBinarySuffix(fs.f_bavail * fs.f_bsize) + "\nTotal inodes: " + formatReadableQuantity(fs.f_files) @@ -93,7 +93,7 @@ void getEnospcInfoMessage(std::filesystem::path path, std::string & msg) msg += "\nFilesystem: " + DiskSpaceMonitor::getFilesystemName(mount_point); } -std::string getAdditionalExceptionInfo(const std::exception & e) +std::string getExtraExceptionInfo(const std::exception & e) { String msg; try @@ -101,7 +101,7 @@ std::string getAdditionalExceptionInfo(const std::exception & e) if (auto file_exception = dynamic_cast(&e)) { if (file_exception->code() == ENOSPC) - getEnospcInfoMessage(file_exception->message(), msg); + getNoSpaceLeftInfoMessage(file_exception->message(), msg); } else if (auto errno_exception = dynamic_cast(&e)) { @@ -118,7 +118,7 @@ std::string getAdditionalExceptionInfo(const std::exception & e) { std::string supposed_to_be_path = errno_exception->message().substr(likely_path_begin, likely_path_end - likely_path_begin); - getEnospcInfoMessage(supposed_to_be_path, msg); + getNoSpaceLeftInfoMessage(supposed_to_be_path, msg); } } } @@ -130,7 +130,7 @@ std::string getAdditionalExceptionInfo(const std::exception & e) return msg; } -std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace /*= false*/, bool with_additional_info /*= true*/) +std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace /*= false*/, bool with_extra_info /*= true*/) { std::stringstream stream; @@ -141,7 +141,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded catch (const Exception & e) { stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace) - << (with_additional_info ? getAdditionalExceptionInfo(e) : "") + << (with_extra_info ? getExtraExceptionInfo(e) : "") << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; } catch (const Poco::Exception & e) @@ -150,7 +150,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded { stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() << ", e.displayText() = " << e.displayText() - << (with_additional_info ? getAdditionalExceptionInfo(e) : "") + << (with_extra_info ? getExtraExceptionInfo(e) : "") << " (version " << VERSION_STRING << VERSION_OFFICIAL; } catch (...) {} @@ -166,7 +166,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded name += " (demangling status: " + toString(status) + ")"; stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what() - << (with_additional_info ? getAdditionalExceptionInfo(e) : "") + << (with_extra_info ? getExtraExceptionInfo(e) : "") << ", version = " << VERSION_STRING << VERSION_OFFICIAL; } catch (...) {} diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h index 2f4e85e32bd..bf29490fe72 100644 --- a/dbms/src/Common/Exception.h +++ b/dbms/src/Common/Exception.h @@ -88,7 +88,7 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_ * only this stack trace will be printed. */ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace = false, - bool with_additional_info = true); + bool with_extra_info = true); /// Returns error code from ErrorCodes int getCurrentExceptionCode(); diff --git a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h index 1640374d390..d3057843f4c 100644 --- a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h +++ b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h @@ -103,7 +103,7 @@ public: using ReservationPtr = std::unique_ptr; - inline static struct statvfs getStatvfs(const std::string & path) + inline static struct statvfs getStatVFS(const std::string & path) { struct statvfs fs; if (statvfs(path.c_str(), &fs) != 0) @@ -113,7 +113,7 @@ public: static UInt64 getUnreservedFreeSpace(const std::string & path) { - struct statvfs fs = getStatvfs(path); + struct statvfs fs = getStatVFS(path); UInt64 res = fs.f_bfree * fs.f_bsize; @@ -152,7 +152,7 @@ public: return std::make_unique(size); } - /// Returns mount point of filesystem where absoulte_path (must exists) is located + /// Returns mount point of filesystem where absoulte_path (must exist) is located static std::filesystem::path getMountPoint(std::filesystem::path absolute_path) { if (absolute_path.is_relative()) @@ -191,7 +191,8 @@ public: mntent fs_info; constexpr size_t buf_size = 4096; /// The same as buffer used for getmntent in glibc. It can happen that it's not enough char buf[buf_size]; - while (getmntent_r(mounted_filesystems, &fs_info, buf, buf_size) && fs_info.mnt_dir != mount_point); + while (getmntent_r(mounted_filesystems, &fs_info, buf, buf_size) && fs_info.mnt_dir != mount_point) + ; endmntent(mounted_filesystems); if (fs_info.mnt_dir != mount_point) throw DB::Exception("Cannot find name of filesystem by mount point " + mount_point, ErrorCodes::SYSTEM_ERROR); From 19e11d6300741a6d222394c0118e126bb3101bc2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 6 Aug 2019 16:19:08 +0300 Subject: [PATCH 19/44] Updated changelog in docs --- docs/fa/changelog.md | 2 +- docs/ru/changelog.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/fa/changelog.md b/docs/fa/changelog.md index b84693cec46..699cc9e7b7c 120000 --- a/docs/fa/changelog.md +++ b/docs/fa/changelog.md @@ -1 +1 @@ -../../CHANGELOG_RU.md \ No newline at end of file +../../CHANGELOG.md \ No newline at end of file diff --git a/docs/ru/changelog.md b/docs/ru/changelog.md index b84693cec46..699cc9e7b7c 120000 --- a/docs/ru/changelog.md +++ b/docs/ru/changelog.md @@ -1 +1 @@ -../../CHANGELOG_RU.md \ No newline at end of file +../../CHANGELOG.md \ No newline at end of file From 4d780d99f4d62de22a60591b8204a8e750006279 Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 6 Aug 2019 22:03:41 +0800 Subject: [PATCH 20/44] build fix --- dbms/programs/server/Server.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 3f96562b656..c2f7ca5f9b6 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -611,7 +611,7 @@ int Server::main(const std::vector & /*args*/) return socket_address; }; - auto socket_bind_listen = [&](auto & socket, const std::string & host, [[maybe_unused]] UInt16 port, [[maybe_unused]] bool secure = 0) + auto socket_bind_listen = [&](auto & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = 0) { auto address = make_socket_address(host, port); #if !defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION < 0x01090100 @@ -681,7 +681,7 @@ int Server::main(const std::vector & /*args*/) }); /// HTTPS - create_server("https_port", [&]([[maybe_unused]] UInt16 port) + create_server("https_port", [&](UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -696,6 +696,7 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening https://" + address.toString()); #else + UNUSED(port); throw Exception{"HTTPS protocol is disabled because Poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif @@ -718,7 +719,7 @@ int Server::main(const std::vector & /*args*/) }); /// TCP with SSL - create_server ("tcp_port_secure", [&]([[maybe_unused]] UInt16 port) + create_server ("tcp_port_secure", [&](UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -732,6 +733,7 @@ int Server::main(const std::vector & /*args*/) new Poco::Net::TCPServerParams)); LOG_INFO(log, "Listening for connections with secure native protocol (tcp_secure): " + address.toString()); #else + UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif @@ -753,7 +755,7 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for replica communication (interserver) http://" + address.toString()); }); - create_server("interserver_https_port", [&]([[maybe_unused]] UInt16 port) + create_server("interserver_https_port", [&](UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; @@ -768,12 +770,13 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for secure replica communication (interserver) https://" + address.toString()); #else + UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif }); - create_server("mysql_port", [&]([[maybe_unused]] UInt16 port) + create_server("mysql_port", [&](UInt16 port) { #if USE_POCO_NETSSL Poco::Net::ServerSocket socket; @@ -788,6 +791,7 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for MySQL compatibility protocol: " + address.toString()); #else + UNUSED(port); throw Exception{"SSL support for MySQL protocol is disabled because Poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif From 42287c0b7252c968618fa7df6ed5521fc637b8d7 Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 6 Aug 2019 22:04:51 +0800 Subject: [PATCH 21/44] format code --- dbms/programs/server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index c2f7ca5f9b6..c2fbce603d4 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -719,7 +719,7 @@ int Server::main(const std::vector & /*args*/) }); /// TCP with SSL - create_server ("tcp_port_secure", [&](UInt16 port) + create_server("tcp_port_secure", [&](UInt16 port) { #if USE_POCO_NETSSL Poco::Net::SecureServerSocket socket; From 511bf370298a1c65659aaf2733243c8e1fe63f3d Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 6 Aug 2019 17:26:51 +0300 Subject: [PATCH 22/44] Update 00602_throw_if.sh --- dbms/tests/queries/0_stateless/00602_throw_if.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00602_throw_if.sh b/dbms/tests/queries/0_stateless/00602_throw_if.sh index 3c790d900d3..69039891bd2 100755 --- a/dbms/tests/queries/0_stateless/00602_throw_if.sh +++ b/dbms/tests/queries/0_stateless/00602_throw_if.sh @@ -8,4 +8,4 @@ custom_exception_message="Number equals 1000000" ${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000000) FROM system.numbers" 2>&1 | grep -cF "$default_exception_message" ${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000000, '$custom_exception_message') FROM system.numbers" 2>&1 | grep -cF "$custom_exception_message" -${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT sum(x = 0) FROM (SELECT throwIf(number = 1000000) AS x FROM numbers(1000000))" 2>&1 \ No newline at end of file +${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT sum(x = 0) FROM (SELECT throwIf(number = 1000000) AS x FROM numbers(1000000))" 2>&1 From 8bf1af35367916edb5d30fec8a887a4efc40ccf0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 6 Aug 2019 17:36:16 +0300 Subject: [PATCH 23/44] Removed trash symlinks #6338 --- docs/zh/data_types/domains/ipv4.md | 78 -------------------------- docs/zh/data_types/domains/ipv6.md | 78 -------------------------- docs/zh/data_types/domains/overview.md | 26 --------- 3 files changed, 182 deletions(-) delete mode 120000 docs/zh/data_types/domains/ipv4.md delete mode 120000 docs/zh/data_types/domains/ipv6.md delete mode 120000 docs/zh/data_types/domains/overview.md diff --git a/docs/zh/data_types/domains/ipv4.md b/docs/zh/data_types/domains/ipv4.md deleted file mode 120000 index 4adf13409fe..00000000000 --- a/docs/zh/data_types/domains/ipv4.md +++ /dev/null @@ -1,78 +0,0 @@ -## IPv4 - -`IPv4`是与`UInt32`类型保持二进制兼容的Domain类型,其用于存储IPv4地址的值。它提供了更为紧凑的二进制存储的同时支持识别可读性更加友好的输入输出格式。 - -### 基本使用 - -``` sql -CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY url; - -DESCRIBE TABLE hits; -``` - -``` -┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐ -│ url │ String │ │ │ │ │ -│ from │ IPv4 │ │ │ │ │ -└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ -``` - -同时您也可以使用`IPv4`类型的列作为主键: - -``` sql -CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; -``` - -在写入与查询时,`IPv4`类型能够识别可读性更加友好的输入输出格式: - -``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.yandex', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); - -SELECT * FROM hits; -``` - -``` -┌─url────────────────────────────────┬───────────from─┐ -│ https://clickhouse.yandex/docs/en/ │ 116.106.34.242 │ -│ https://wikipedia.org │ 116.253.40.133 │ -│ https://clickhouse.yandex │ 183.247.232.58 │ -└────────────────────────────────────┴────────────────┘ -``` - -同时它提供更为紧凑的二进制存储格式: - -``` sql -SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; -``` - -``` -┌─toTypeName(from)─┬─hex(from)─┐ -│ IPv4 │ B7F7E83A │ -└──────────────────┴───────────┘ -``` - -不可隐式转换为除`UInt32`以外的其他类型类型。如果要将`IPv4`类型的值转换成字符串,你可以使用`IPv4NumToString()`显示的进行转换: - -``` sql -SELECT toTypeName(s), IPv4NumToString(from) as s FROM hits LIMIT 1; -``` - -``` -┌─toTypeName(IPv4NumToString(from))─┬─s──────────────┐ -│ String │ 183.247.232.58 │ -└───────────────────────────────────┴────────────────┘ -``` - -或可以使用`CAST`将它转换为`UInt32`类型: - -``` sql -SELECT toTypeName(i), CAST(from as UInt32) as i FROM hits LIMIT 1; -``` - -``` -┌─toTypeName(CAST(from, 'UInt32'))─┬──────────i─┐ -│ UInt32 │ 3086477370 │ -└──────────────────────────────────┴────────────┘ -``` - -[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/ipv4) diff --git a/docs/zh/data_types/domains/ipv6.md b/docs/zh/data_types/domains/ipv6.md deleted file mode 120000 index 1209350990f..00000000000 --- a/docs/zh/data_types/domains/ipv6.md +++ /dev/null @@ -1,78 +0,0 @@ -## IPv6 - -`IPv6`是与`FixedString(16)`类型保持二进制兼容的Domain类型,其用于存储IPv6地址的值。它提供了更为紧凑的二进制存储的同时支持识别可读性更加友好的输入输出格式。 - -### 基本用法 - -``` sql -CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY url; - -DESCRIBE TABLE hits; -``` - -``` -┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐ -│ url │ String │ │ │ │ │ -│ from │ IPv6 │ │ │ │ │ -└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ -``` - -同时您也可以使用`IPv6`类型的列作为主键: - -``` sql -CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; -``` - -在写入与查询时,`IPv6`类型能够识别可读性更加友好的输入输出格式: - -``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.yandex', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); - -SELECT * FROM hits; -``` - -``` -┌─url────────────────────────────────┬─from──────────────────────────┐ -│ https://clickhouse.yandex │ 2001:44c8:129:2632:33:0:252:2 │ -│ https://clickhouse.yandex/docs/en/ │ 2a02:e980:1e::1 │ -│ https://wikipedia.org │ 2a02:aa08:e000:3100::2 │ -└────────────────────────────────────┴───────────────────────────────┘ -``` - -同时它提供更为紧凑的二进制存储格式: - -``` sql -SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; -``` - -``` -┌─toTypeName(from)─┬─hex(from)────────────────────────┐ -│ IPv6 │ 200144C8012926320033000002520002 │ -└──────────────────┴──────────────────────────────────┘ -``` - -不可隐式转换为除`FixedString(16)`以外的其他类型类型。如果要将`IPv6`类型的值转换成字符串,你可以使用`IPv6NumToString()`显示的进行转换: - -``` sql -SELECT toTypeName(s), IPv6NumToString(from) as s FROM hits LIMIT 1; -``` - -``` -┌─toTypeName(IPv6NumToString(from))─┬─s─────────────────────────────┐ -│ String │ 2001:44c8:129:2632:33:0:252:2 │ -└───────────────────────────────────┴───────────────────────────────┘ -``` - -或使用`CAST`将其转换为`FixedString(16)`: - -``` sql -SELECT toTypeName(i), CAST(from as FixedString(16)) as i FROM hits LIMIT 1; -``` - -``` -┌─toTypeName(CAST(from, 'FixedString(16)'))─┬─i───────┐ -│ FixedString(16) │ ��� │ -└───────────────────────────────────────────┴─────────┘ -``` - -[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/ipv6) diff --git a/docs/zh/data_types/domains/overview.md b/docs/zh/data_types/domains/overview.md deleted file mode 120000 index b4db116e75b..00000000000 --- a/docs/zh/data_types/domains/overview.md +++ /dev/null @@ -1,26 +0,0 @@ -# Domains - -Domain类型是特定实现的类型,它总是与某个现存的基础类型保持二进制兼容的同时添加一些额外的特性,以能够在维持磁盘数据不变的情况下使用这些额外的特性。目前ClickHouse暂不支持自定义domain类型。 - -如果你可以在一个地方使用与Domain类型二进制兼容的基础类型,那么在相同的地方您也可以使用Domain类型,例如: - -* 使用Domain类型作为表中列的类型 -* 对Domain类型的列进行读/写数据 -* 如果与Domain二进制兼容的基础类型可以作为索引,那么Domain类型也可以作为索引 -* 将Domain类型作为参数传递给函数使用 -* 其他 - -### Domains的额外特性 - -* 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 -* 在INSERT INTO domain_table(domain_column) VALUES(...)中输入数据总是以更人性化的格式进行输入 -* 在SELECT domain_column FROM domain_table中数据总是以更人性化的格式输出 -* 在INSERT INTO domain_table FORMAT CSV ...中,实现外部源数据以更人性化的格式载入 - -### Domains类型的限制 - -* 无法通过`ALTER TABLE`将基础类型的索引转换为Domain类型的索引。 -* 当从其他列或表插入数据时,无法将string类型的值隐式地转换为Domain类型的值。 -* 无法对存储为Domain类型的值添加约束。 - -[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/overview) From c3039196da80f4e08604c448c347a1abb6894bd0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 6 Aug 2019 17:46:17 +0300 Subject: [PATCH 24/44] fixes --- dbms/src/Common/Exception.cpp | 2 +- dbms/src/Storages/MergeTree/DiskSpaceMonitor.h | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 320b079774e..8bbeba6fd8b 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -124,7 +124,7 @@ std::string getExtraExceptionInfo(const std::exception & e) } } catch (...) { - msg += "\nCannot print additional info: " + getCurrentExceptionMessage(false, false, false); + msg += "\nCannot print extra info: " + getCurrentExceptionMessage(false, false, false); } return msg; diff --git a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h index d3057843f4c..9ee35b7b61b 100644 --- a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h +++ b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h @@ -5,8 +5,10 @@ #include #include #include +#if defined(__linux__) #include #include +#endif #include #include #include @@ -185,9 +187,15 @@ public: } /// Returns name of filesystem mounted to mount_point - static std::string getFilesystemName(const std::string & mount_point) +#if !defined(__linux__) +[[noreturn]] +#endif + static std::string getFilesystemName([[maybe_unused]] const std::string & mount_point) { +#if defined(__linux__) auto mounted_filesystems = setmntent("/etc/mtab", "r"); + if (!mounted_filesystems) + throw DB::Exception("Cannot open /etc/mtab to get name of filesystem", ErrorCodes::SYSTEM_ERROR); mntent fs_info; constexpr size_t buf_size = 4096; /// The same as buffer used for getmntent in glibc. It can happen that it's not enough char buf[buf_size]; @@ -197,6 +205,9 @@ public: if (fs_info.mnt_dir != mount_point) throw DB::Exception("Cannot find name of filesystem by mount point " + mount_point, ErrorCodes::SYSTEM_ERROR); return fs_info.mnt_fsname; +#else + throw DB::Exception("Supported on linux only", ErrorCodes::NOT_IMPLEMENTED); +#endif } private: From fcb04828301d13f578d01ee8c813c3c07929e521 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 15 Jul 2019 15:51:08 +0300 Subject: [PATCH 25/44] Implement geohashesInBox function. #6127 --- dbms/src/Functions/GeoUtils.cpp | 142 +++++++++++++-- dbms/src/Functions/GeoUtils.h | 25 ++- dbms/src/Functions/geohashesInBox.cpp | 169 ++++++++++++++++++ dbms/src/Functions/registerFunctionsGeo.cpp | 2 + .../00972_geohashesInBox.reference | 40 +++++ .../0_stateless/00972_geohashesInBox.sql | 63 +++++++ docs/en/query_language/functions/geo.md | 32 ++++ 7 files changed, 460 insertions(+), 13 deletions(-) create mode 100644 dbms/src/Functions/geohashesInBox.cpp create mode 100644 dbms/tests/queries/0_stateless/00972_geohashesInBox.reference create mode 100644 dbms/tests/queries/0_stateless/00972_geohashesInBox.sql diff --git a/dbms/src/Functions/GeoUtils.cpp b/dbms/src/Functions/GeoUtils.cpp index 2c45b6d2cfc..5134343dae0 100644 --- a/dbms/src/Functions/GeoUtils.cpp +++ b/dbms/src/Functions/GeoUtils.cpp @@ -36,6 +36,10 @@ const UInt8 geohash_base32_decode_lookup_table[256] = { const size_t BITS_PER_SYMBOL = 5; const size_t MAX_PRECISION = 12; const size_t MAX_BITS = MAX_PRECISION * BITS_PER_SYMBOL * 1.5; +const Float64 LON_MIN = -180; +const Float64 LON_MAX = 180; +const Float64 LAT_MIN = -90; +const Float64 LAT_MAX = 90; using Encoded = std::array; @@ -64,7 +68,7 @@ inline Encoded encodeCoordinate(Float64 coord, Float64 min, Float64 max, UInt8 b for (size_t i = 0; i < bits; ++i) { - Float64 mid = (max + min) / 2; + const Float64 mid = (max + min) / 2; if (coord >= mid) { result[i] = 1; @@ -148,7 +152,7 @@ inline void base32Encode(const Encoded & binary, UInt8 precision, char * out) { extern const char geohash_base32_encode_lookup_table[32]; - for (UInt8 i = 0; i < precision * BITS_PER_SYMBOL; i += 5) + for (UInt8 i = 0; i < precision * BITS_PER_SYMBOL; i += BITS_PER_SYMBOL) { UInt8 v = binary[i]; v <<= 1; @@ -187,24 +191,38 @@ inline Encoded base32Decode(const char * encoded_string, size_t encoded_length) return result; } +inline Float64 getMaxSpan(CoordType type) +{ + if (type == LONGITUDE) + { + return LON_MAX - LON_MIN; + } + + return LAT_MAX - LAT_MIN; } -namespace DB +inline Float64 getSpan(UInt8 precision, CoordType type) { + const auto bits = singleCoordBitsPrecision(precision, type); + // since every bit of precision divides span by 2, divide max span by 2^bits. + return ldexp(getMaxSpan(type), -1 * bits); +} -namespace GeoUtils -{ - -size_t geohashEncode(Float64 longitude, Float64 latitude, UInt8 precision, char *& out) +inline UInt8 geohashPrecision(UInt8 precision) { if (precision == 0 || precision > MAX_PRECISION) { precision = MAX_PRECISION; } + return precision; +} + +inline size_t geohashEncodeImpl(Float64 longitude, Float64 latitude, UInt8 precision, char * out) +{ const Encoded combined = merge( - encodeCoordinate(longitude, -180, 180, singleCoordBitsPrecision(precision, LONGITUDE)), - encodeCoordinate(latitude, -90, 90, singleCoordBitsPrecision(precision, LATITUDE)), + encodeCoordinate(longitude, LON_MIN, LON_MAX, singleCoordBitsPrecision(precision, LONGITUDE)), + encodeCoordinate(latitude, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE)), precision); base32Encode(combined, precision, out); @@ -212,9 +230,28 @@ size_t geohashEncode(Float64 longitude, Float64 latitude, UInt8 precision, char return precision; } +} + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ARGUMENT_OUT_OF_BOUND; +} + +namespace GeoUtils +{ + +size_t geohashEncode(Float64 longitude, Float64 latitude, UInt8 precision, char * out) +{ + precision = geohashPrecision(precision); + return geohashEncodeImpl(longitude, latitude, precision, out); +} + void geohashDecode(const char * encoded_string, size_t encoded_len, Float64 * longitude, Float64 * latitude) { - const UInt8 precision = std::min(encoded_len, MAX_PRECISION); + const UInt8 precision = std::min(encoded_len, static_cast(MAX_PRECISION)); if (precision == 0) { return; @@ -223,8 +260,89 @@ void geohashDecode(const char * encoded_string, size_t encoded_len, Float64 * lo Encoded lat_encoded, lon_encoded; std::tie(lon_encoded, lat_encoded) = split(base32Decode(encoded_string, precision), precision); - *longitude = decodeCoordinate(lon_encoded, -180, 180, singleCoordBitsPrecision(precision, LONGITUDE)); - *latitude = decodeCoordinate(lat_encoded, -90, 90, singleCoordBitsPrecision(precision, LATITUDE)); + *longitude = decodeCoordinate(lon_encoded, LON_MIN, LON_MAX, singleCoordBitsPrecision(precision, LONGITUDE)); + *latitude = decodeCoordinate(lat_encoded, LAT_MIN, LAT_MAX, singleCoordBitsPrecision(precision, LATITUDE)); +} + +GeohashesInBoxPreparedArgs geohashesInBoxPrepare(const Float64 longitude_min, + const Float64 latitude_min, + const Float64 longitude_max, + const Float64 latitude_max, + UInt8 precision) +{ + precision = geohashPrecision(precision); + + if (longitude_max < longitude_min || latitude_max < latitude_min) + { + return {}; + } + + const auto lon_step = getSpan(precision, LONGITUDE); + const auto lat_step = getSpan(precision, LATITUDE); + + // align max to the right(or up) border of geohash grid cell to ensure that cell is in result. + Float64 lon_min = floor(longitude_min / lon_step) * lon_step; + Float64 lat_min = floor(latitude_min / lat_step) * lat_step; + Float64 lon_max = ceil(longitude_max / lon_step) * lon_step; + Float64 lat_max = ceil(latitude_max / lat_step) * lat_step; + + const auto lon_span = lon_max - lon_min; + const auto lat_span = lat_max - lat_min; + // in case of a very small (or zero) span, produce at least 1 item. + const auto items_count = std::max(size_t{1}, static_cast(ceil(lon_span/lon_step * lat_span/lat_step))); + + return GeohashesInBoxPreparedArgs{ + items_count, + precision, + lon_min, + lat_min, + lon_max, + lat_max, + lon_step, + lat_step + }; +} + +UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out) +{ + if (args.items_count == 0 + || args.precision == 0 + || args.precision > MAX_PRECISION + || args.latitude_min > args.latitude_max + || args.longitude_min > args.longitude_max + || args.longitude_step <= 0 + || args.latitude_step <= 0) + { + return 0; + } + + UInt64 items = 0; + for (auto lon = args.longitude_min; lon < args.longitude_max; lon += args.longitude_step) + { + for (auto lat = args.latitude_min; lat < args.latitude_max; lat += args.latitude_step) + { + assert(items <= args.items_count); + + size_t l = geohashEncodeImpl(lon, lat, args.precision, out); + out += l; + *out = '\0'; + ++out; + + ++items; + } + } + + if (items == 0 && args.items_count != 0) + { + size_t l = geohashEncodeImpl(args.longitude_min, args.latitude_min, args.precision, out); + out += l; + *out = '\0'; + ++out; + + ++items; + } + + return items; } } diff --git a/dbms/src/Functions/GeoUtils.h b/dbms/src/Functions/GeoUtils.h index b3a283ee2e6..9c5ebf98b16 100644 --- a/dbms/src/Functions/GeoUtils.h +++ b/dbms/src/Functions/GeoUtils.h @@ -706,10 +706,33 @@ std::string serialize(Polygon && polygon) return result; } -size_t geohashEncode(Float64 longitude, Float64 latitude, UInt8 precision, char *& out); +size_t geohashEncode(Float64 longitude, Float64 latitude, UInt8 precision, char * out); void geohashDecode(const char * encoded_string, size_t encoded_len, Float64 * longitude, Float64 * latitude); +std::vector> geohashCoverBox(Float64 longitude_min, Float64 latitude_min, Float64 longitude_max, Float64 latitude_max, UInt8 precision, UInt32 max_items = 0); + +struct GeohashesInBoxPreparedArgs +{ + UInt64 items_count = 0; + UInt8 precision = 0; + + Float64 longitude_min = 0.0; + Float64 latitude_min = 0.0; + Float64 longitude_max = 0.0; + Float64 latitude_max = 0.0; + + Float64 longitude_step = 0.0; + Float64 latitude_step = 0.0; +}; + +GeohashesInBoxPreparedArgs geohashesInBoxPrepare(const Float64 longitude_min, + const Float64 latitude_min, + Float64 longitude_max, + Float64 latitude_max, + UInt8 precision); + +UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & estimation, char * out); } /// GeoUtils diff --git a/dbms/src/Functions/geohashesInBox.cpp b/dbms/src/Functions/geohashesInBox.cpp new file mode 100644 index 00000000000..09e36e01e77 --- /dev/null +++ b/dbms/src/Functions/geohashesInBox.cpp @@ -0,0 +1,169 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int TOO_LARGE_ARRAY_SIZE; +} + +class FunctionGeohashesInBox : public IFunction +{ +public: + static constexpr auto name = "geohashesInBox"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 5; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + validateArgumentType(*this, arguments, 0, isFloat, "float"); + validateArgumentType(*this, arguments, 1, isFloat, "float"); + validateArgumentType(*this, arguments, 2, isFloat, "float"); + validateArgumentType(*this, arguments, 3, isFloat, "float"); + validateArgumentType(*this, arguments, 4, isUInt8, "integer"); + + if (!(arguments[0]->equals(*arguments[1]) && + arguments[0]->equals(*arguments[2]) && + arguments[0]->equals(*arguments[3]))) + { + throw Exception("Illegal type of argument of " + getName() + + " all coordinate arguments must have the same type, instead they are:" + + arguments[0]->getName() + ", " + + arguments[1]->getName() + ", " + + arguments[2]->getName() + ", " + + arguments[3]->getName() + ".", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(std::make_shared()); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + template + void execute(const IColumn * lon_min_column, + const IColumn * lat_min_column, + const IColumn * lon_max_column, + const IColumn * lat_max_column, + const IColumn * precision_column, + ColumnPtr & result) + { + static constexpr size_t max_array_size = 10'000'000; + + const auto * lon_min = checkAndGetColumn>(lon_min_column); + const auto * lat_min = checkAndGetColumn>(lat_min_column); + const auto * lon_max = checkAndGetColumn>(lon_max_column); + const auto * lat_max = checkAndGetColumn>(lat_max_column); + auto * precision = checkAndGetColumn>(precision_column); + if (precision == nullptr) + { + precision = checkAndGetColumnConstData>(precision_column); + } + + if (!lon_min || !lat_min || !lon_max || !lat_max || !precision) + { + throw Exception("Unsupported argument types for function " + getName() + " : " + + lon_min_column->getName() + ", " + + lat_min_column->getName() + ", " + + lon_max_column->getName() + ", " + + lat_max_column->getName() + ".", + ErrorCodes::LOGICAL_ERROR); + } + + const size_t total_rows = lat_min->size(); + + auto col_res = ColumnArray::create(ColumnString::create()); + ColumnString & res_strings = typeid_cast(col_res->getData()); + ColumnArray::Offsets & res_offsets = col_res->getOffsets(); + ColumnString::Chars & res_strings_chars = res_strings.getChars(); + ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets(); + + for (size_t row = 0; row < total_rows; ++row) + { + const Float64 lon_min_value = lon_min->getElement(row); + const Float64 lat_min_value = lat_min->getElement(row); + const Float64 lon_max_value = lon_max->getElement(row); + const Float64 lat_max_value = lat_max->getElement(row); + + const auto prepared_args = GeoUtils::geohashesInBoxPrepare( + lon_min_value, lat_min_value, lon_max_value, lat_max_value, + precision->getElement(row % precision->size())); + if (prepared_args.items_count > max_array_size) + { + throw Exception(getName() + " would produce " + std::to_string(prepared_args.items_count) + + " array elements, which is bigger than the allowed maximum of " + std::to_string(max_array_size), + ErrorCodes::TOO_LARGE_ARRAY_SIZE); + } + + res_strings_offsets.reserve(res_strings_offsets.size() + prepared_args.items_count); + res_strings_chars.resize(res_strings_chars.size() + prepared_args.items_count * (prepared_args.precision + 1)); + const auto starting_offset = res_strings_offsets.empty() ? 0 : res_strings_offsets.back(); + char * out = reinterpret_cast(res_strings_chars.data() + starting_offset); + + // Actually write geohashes into preallocated buffer. + GeoUtils::geohashesInBox(prepared_args, out); + + for (UInt8 i = 1; i <= prepared_args.items_count ; ++i) + { + res_strings_offsets.push_back(starting_offset + (prepared_args.precision + 1) * i); + } + res_offsets.push_back((res_offsets.empty() ? 0 : res_offsets.back()) + prepared_args.items_count); + } + if (!res_strings_offsets.empty() && res_strings_offsets.back() != res_strings_chars.size()) + { + throw Exception("String column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + } + + if (!res_offsets.empty() && res_offsets.back() != res_strings.size()) + { + throw Exception("Arrary column size mismatch (internal logical error)" + + std::to_string(res_offsets.back()) + " != " + std::to_string(res_strings.size()), + ErrorCodes::LOGICAL_ERROR); + } + + result = std::move(col_res); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override + { + const IColumn * lon_min = block.getByPosition(arguments[0]).column.get(); + const IColumn * lat_min = block.getByPosition(arguments[1]).column.get(); + const IColumn * lon_max = block.getByPosition(arguments[2]).column.get(); + const IColumn * lat_max = block.getByPosition(arguments[3]).column.get(); + const IColumn * prec = block.getByPosition(arguments[4]).column.get(); + ColumnPtr & res = block.getByPosition(result).column; + + if (checkColumn>(lon_min)) + { + execute(lon_min, lat_min, lon_max, lat_max, prec, res); + } + else + { + execute(lon_min, lat_min, lon_max, lat_max, prec, res); + } + } +}; + +void registerFunctionGeohashesInBox(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/registerFunctionsGeo.cpp b/dbms/src/Functions/registerFunctionsGeo.cpp index 0f436811874..adc025aafe3 100644 --- a/dbms/src/Functions/registerFunctionsGeo.cpp +++ b/dbms/src/Functions/registerFunctionsGeo.cpp @@ -10,6 +10,7 @@ void registerFunctionPointInEllipses(FunctionFactory & factory); void registerFunctionPointInPolygon(FunctionFactory & factory); void registerFunctionGeohashEncode(FunctionFactory & factory); void registerFunctionGeohashDecode(FunctionFactory & factory); +void registerFunctionGeohashesInBox(FunctionFactory & factory); #if USE_H3 void registerFunctionGeoToH3(FunctionFactory &); @@ -22,6 +23,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionPointInPolygon(factory); registerFunctionGeohashEncode(factory); registerFunctionGeohashDecode(factory); + registerFunctionGeohashesInBox(factory); #if USE_H3 registerFunctionGeoToH3(factory); diff --git a/dbms/tests/queries/0_stateless/00972_geohashesInBox.reference b/dbms/tests/queries/0_stateless/00972_geohashesInBox.reference new file mode 100644 index 00000000000..e6844fa8394 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00972_geohashesInBox.reference @@ -0,0 +1,40 @@ +center +['7zz','ebp','kpb','s00'] +['7zzzm','7zzzq','7zzzr','7zzzt','7zzzv','7zzzw','7zzzx','7zzzy','7zzzz','ebpbj','ebpbm','ebpbn','ebpbp','ebpbq','ebpbr','ebpbt','ebpbw','ebpbx','kpbp2','kpbp3','kpbp6','kpbp8','kpbp9','kpbpb','kpbpc','kpbpd','kpbpf','s0000','s0001','s0002','s0003','s0004','s0006','s0008','s0009','s000d'] +['7zzzz','ebpbp','kpbpb','s0000'] +north pole +['bpb'] +['gzz'] +['upb'] +['zzz'] +south pole +['000'] +['5bp'] +['h00'] +['pbp'] +wrap point around equator +['rzz'] +['xbp'] +['2pb'] +['800'] +arbitrary values in all 4 quarters +['w1muy4','w1muy5','w1muy6','w1muy7','w1muyh','w1muyk'] +['thym0','thym1','thym2','thym3','thym4','thym6','thym8','thym9','thymd'] +['6gkzx5','6gkzx7','6gkzxh','6gkzxj','6gkzxk','6gkzxm'] +['h927mu','h927mv','h927my','h927qh','h927qj','h927qn'] +small range always produces array of length 1 +zooming +['s7'] +['s7w'] +['s7w1','s7w3','s7w4','s7w5','s7w6','s7w7','s7w9','s7wc','s7wd','s7we','s7wf','s7wg','s7wh','s7wj','s7wk','s7wm','s7wn','s7wp','s7wq','s7wr','s7ws','s7wt','s7wu','s7wv','s7ww','s7wx','s7wy','s7wz'] +['s7w1z','s7w3b','s7w3c','s7w3f','s7w3g','s7w3u','s7w4p','s7w4r','s7w4x','s7w4z','s7w5p','s7w60','s7w61','s7w62','s7w63','s7w64','s7w65','s7w66','s7w67','s7w68','s7w69','s7w6b','s7w6c','s7w6d','s7w6e','s7w6f','s7w6g','s7w6h','s7w6k','s7w6s','s7w6u','s7w70','s7w71','s7w74','s7w75','s7w7h'] +['s7w1z0','s7w1z1','s7w1z2','s7w1z3','s7w1z4','s7w1z5','s7w1z6','s7w1z7','s7w1z8','s7w1z9','s7w1zb','s7w1zc','s7w1zd','s7w1ze','s7w1zf','s7w1zg','s7w1zh','s7w1zj','s7w1zk','s7w1zm','s7w1zn','s7w1zp','s7w1zq','s7w1zr','s7w1zs','s7w1zt','s7w1zu','s7w1zv','s7w1zw','s7w1zx','s7w1zy','s7w1zz','s7w3b0','s7w3b1','s7w3b2','s7w3b3','s7w3b4','s7w3b5','s7w3b6','s7w3b7','s7w3b8','s7w3b9','s7w3bd','s7w3be','s7w3bh','s7w3bj','s7w3bk','s7w3bm','s7w3bn','s7w3bp','s7w3bq','s7w3br','s7w3bs','s7w3bt','s7w3bw','s7w3bx','s7w4p0','s7w4p1','s7w4p2','s7w4p3','s7w4p4','s7w4p5','s7w4p6','s7w4p7','s7w4p8','s7w4p9','s7w4pb','s7w4pc','s7w4pd','s7w4pe','s7w4pf','s7w4pg','s7w4ph','s7w4pk','s7w4ps','s7w4pu','s7w600','s7w601','s7w602','s7w603','s7w604','s7w605','s7w606','s7w607','s7w608','s7w609','s7w60d','s7w60e','s7w60h','s7w60k','s7w60s'] +['s7w1z0g','s7w1z0u','s7w1z0v','s7w1z0y','s7w1z0z','s7w1z15','s7w1z17','s7w1z1e','s7w1z1g','s7w1z1h','s7w1z1j','s7w1z1k','s7w1z1m','s7w1z1n','s7w1z1p','s7w1z1q','s7w1z1r','s7w1z1s','s7w1z1t','s7w1z1u','s7w1z1v','s7w1z1w','s7w1z1x','s7w1z1y','s7w1z1z','s7w1z2b','s7w1z2c','s7w1z2f','s7w1z30','s7w1z31','s7w1z32','s7w1z33','s7w1z34','s7w1z36','s7w1z38','s7w1z39','s7w1z3b','s7w1z3c','s7w1z3d','s7w1z3f','s7w1z45','s7w1z47','s7w1z4e','s7w1z4h','s7w1z4j','s7w1z4k','s7w1z4m','s7w1z4n','s7w1z4p','s7w1z4q','s7w1z4r','s7w1z4s','s7w1z4t','s7w1z4w','s7w1z4x','s7w1z60','s7w1z61','s7w1z62','s7w1z63','s7w1z64','s7w1z66','s7w1z68','s7w1z69','s7w1z6d'] +['s7w1z0gs','s7w1z0gt','s7w1z0gu','s7w1z0gv','s7w1z0gw','s7w1z0gx','s7w1z0gy','s7w1z0gz','s7w1z0uh','s7w1z0uj','s7w1z0uk','s7w1z0um','s7w1z0un','s7w1z0up','s7w1z0uq','s7w1z0ur','s7w1z158','s7w1z159','s7w1z15b','s7w1z15c','s7w1z15d','s7w1z15f','s7w1z1h0','s7w1z1h1','s7w1z1h2','s7w1z1h3','s7w1z1h4','s7w1z1h6'] +['s7w1z0gs3','s7w1z0gs6','s7w1z0gs7','s7w1z0gs9','s7w1z0gsc','s7w1z0gsd','s7w1z0gse','s7w1z0gsf','s7w1z0gsg','s7w1z0gsk','s7w1z0gss','s7w1z0gsu','s7w1z0gt1','s7w1z0gt4','s7w1z0gt5','s7w1z0gth'] +['s7w1z0gs3y','s7w1z0gs3z','s7w1z0gs6n','s7w1z0gs6p','s7w1z0gs9b','s7w1z0gsd0'] +['s7w1z0gs3y0','s7w1z0gs3y1','s7w1z0gs3y2','s7w1z0gs3y3'] +['s7w1z0gs3y0z','s7w1z0gs3y1p','s7w1z0gs3y1r','s7w1z0gs3y1x','s7w1z0gs3y2b','s7w1z0gs3y2c','s7w1z0gs3y2f','s7w1z0gs3y2g','s7w1z0gs3y2u','s7w1z0gs3y2v','s7w1z0gs3y30','s7w1z0gs3y31','s7w1z0gs3y32','s7w1z0gs3y33','s7w1z0gs3y34','s7w1z0gs3y35','s7w1z0gs3y36','s7w1z0gs3y37','s7w1z0gs3y38','s7w1z0gs3y39','s7w1z0gs3y3d','s7w1z0gs3y3e','s7w1z0gs3y3h','s7w1z0gs3y3j','s7w1z0gs3y3k','s7w1z0gs3y3m','s7w1z0gs3y3s','s7w1z0gs3y3t'] +['s7w1z0gs3y0z','s7w1z0gs3y1p','s7w1z0gs3y1r','s7w1z0gs3y1x','s7w1z0gs3y2b','s7w1z0gs3y2c','s7w1z0gs3y2f','s7w1z0gs3y2g','s7w1z0gs3y2u','s7w1z0gs3y2v','s7w1z0gs3y30','s7w1z0gs3y31','s7w1z0gs3y32','s7w1z0gs3y33','s7w1z0gs3y34','s7w1z0gs3y35','s7w1z0gs3y36','s7w1z0gs3y37','s7w1z0gs3y38','s7w1z0gs3y39','s7w1z0gs3y3d','s7w1z0gs3y3e','s7w1z0gs3y3h','s7w1z0gs3y3j','s7w1z0gs3y3k','s7w1z0gs3y3m','s7w1z0gs3y3s','s7w1z0gs3y3t'] +['s7w1z0gs3y0z','s7w1z0gs3y1p','s7w1z0gs3y1r','s7w1z0gs3y1x','s7w1z0gs3y2b','s7w1z0gs3y2c','s7w1z0gs3y2f','s7w1z0gs3y2g','s7w1z0gs3y2u','s7w1z0gs3y2v','s7w1z0gs3y30','s7w1z0gs3y31','s7w1z0gs3y32','s7w1z0gs3y33','s7w1z0gs3y34','s7w1z0gs3y35','s7w1z0gs3y36','s7w1z0gs3y37','s7w1z0gs3y38','s7w1z0gs3y39','s7w1z0gs3y3d','s7w1z0gs3y3e','s7w1z0gs3y3h','s7w1z0gs3y3j','s7w1z0gs3y3k','s7w1z0gs3y3m','s7w1z0gs3y3s','s7w1z0gs3y3t'] +errors diff --git a/dbms/tests/queries/0_stateless/00972_geohashesInBox.sql b/dbms/tests/queries/0_stateless/00972_geohashesInBox.sql new file mode 100644 index 00000000000..f382bf234ac --- /dev/null +++ b/dbms/tests/queries/0_stateless/00972_geohashesInBox.sql @@ -0,0 +1,63 @@ +-- test data acquired with: https://github.com/sunng87/node-geohash +-- geohash.bboxes(minlat, minlon, maxlat, maxlon, precision) +-- as +-- geohashesInBox(minlon, minlat, maxlon, maxlat, precision) +-- except for the cases when JS-version produces result outside of given region, +-- typically at wrap points: poles, 0-latitude and 0-longitude. + +select 'center'; +SELECT arraySort(geohashesInBox(-1.0, -1.0, 1.0, 1.0, 3)); +SELECT arraySort(geohashesInBox(-0.1, -0.1, 0.1, 0.1, 5)); +SELECT arraySort(geohashesInBox(-0.01, -0.01, 0.01, 0.01, 5)); + +select 'north pole'; +SELECT arraySort(geohashesInBox(-180.0, 89.0, -179.0, 90.0, 3)); +SELECT arraySort(geohashesInBox(-1.0, 89.0, 0.0, 90.0, 3)); +SELECT arraySort(geohashesInBox(0.0, 89.0, 1.0, 90.0, 3)); +SELECT arraySort(geohashesInBox(179.0, 89.0, 180.0, 90.0, 3)); + +select 'south pole'; +SELECT arraySort(geohashesInBox(-180.0, -90.0, -179.0, -89.0, 3)); +SELECT arraySort(geohashesInBox(-1.0, -90.0, 0.0, -89.0, 3)); +SELECT arraySort(geohashesInBox(0.0, -90.0, 1.0, -89.0, 3)); +SELECT arraySort(geohashesInBox(179.0, -90.0, 180.0, -89.0, 3)); + +select 'wrap point around equator'; +SELECT arraySort(geohashesInBox(179.0, -1.0, 180.0, 0.0, 3)); +SELECT arraySort(geohashesInBox(179.0, 0.0, 180.0, 1.0, 3)); +SELECT arraySort(geohashesInBox(-180.0, -1.0, -179.0, 0.0, 3)); +SELECT arraySort(geohashesInBox(-180.0, 0.0, -179.0, 1.0, 3)); + +select 'arbitrary values in all 4 quarters'; +SELECT arraySort(geohashesInBox(98.36, 7.88, 98.37, 7.89, 6)); +SELECT arraySort(geohashesInBox(53.8, 27.6, 53.9, 27.7, 5)); +SELECT arraySort(geohashesInBox(-49.26, -25.38, -49.25, -25.37, 6)); +SELECT arraySort(geohashesInBox(23.11, -82.37, 23.12, -82.36, 6)); + +select 'small range always produces array of length 1'; +SELECT lon/5 - 180 as lon1, lat/5 - 90 as lat1, lon1 as lon2, lat1 as lat2, geohashesInBox(lon1, lat1, lon2, lat2, 1) as g FROM (SELECT arrayJoin(range(360*5)) as lon, arrayJoin(range(180*5)) as lat) WHERE length(g) != 1; +SELECT lon/5 - 40 as lon1, lat/5 - 20 as lat1, lon1 as lon2, lat1 as lat2, geohashesInBox(lon1, lat1, lon2, lat2, 12) as g FROM (SELECT arrayJoin(range(80*5)) as lon, arrayJoin(range(10*5)) as lat) WHERE length(g) != 1; +SELECT lon/5 - 40 as lon1, lat/5 - 20 as lat1, lon1 + 0.0000000001 as lon2, lat1 + 0.0000000001 as lat2, geohashesInBox(lon1, lat1, lon2, lat2, 1) as g FROM (SELECT arrayJoin(range(80*5)) as lon, arrayJoin(range(10*5)) as lat) WHERE length(g) != 1; + +select 'zooming'; +SELECT arraySort(geohashesInBox(20.0, 20.0, 21.0, 21.0, 2)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 21.0, 21.0, 3)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 21.0, 21.0, 4)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.25, 20.25, 5)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.0625, 20.0625, 6)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.01, 20.01, 7)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.001, 20.001, 8)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.0001, 20.0001, 9)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.00001, 20.00001, 10)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.000001, 20.000001, 11)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.000001, 20.000001, 12)); + + -- precision greater than 12 is truncated to 12, so these two calls would produce same result as above +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.000001, 20.000001, 13)); +SELECT arraySort(geohashesInBox(20.0, 20.0, 20.000001, 20.000001, 14)); + +select 'errors'; +SELECT geohashesInBox(); -- { serverError 42 } -- not enough arguments +SELECT geohashesInBox(1, 2, 3, 4, 5); -- { serverError 43 } -- wrong types of arguments +SELECT geohashesInBox(toFloat32(1.0), 2.0, 3.0, 4.0, 5); -- { serverError 43 } -- all lats and longs should be of the same type +SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 12); -- { serverError 128 } -- to many elements in array diff --git a/docs/en/query_language/functions/geo.md b/docs/en/query_language/functions/geo.md index 2c84a4516ba..d05345da29e 100644 --- a/docs/en/query_language/functions/geo.md +++ b/docs/en/query_language/functions/geo.md @@ -183,4 +183,36 @@ SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index └────────────────────┘ ``` +## geohashesInBox + +Returns an array of geohash-encoded strings of given precision that fall inside and intersect boundaries of given box, basically a 2D grid flattened into array. + +**Input values** + +- longitude_min - min longitude, floating value in range `[-180°, 180°]` +- latitude_min - min latitude, floating value in range `[-90°, 90°]` +- longitude_max - max longitude, floating value in range `[-180°, 180°]` +- latitude_max - max latitude, floating value in range `[-90°, 90°]` +- precision - geohash precision, `UInt8` in range `[1, 12]` + +Please note that all coordinate parameters should be of the same type: either `Float32` or `Float64`. + +**Returned values** + +- array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. +- [] - empty array if *min* values of *latitude* and *longitude* aren't less than corresponding *max* values. + +Please note that function will throw an exception if resulting array is over 10'000'000 items long. + +**Example** + +``` +SELECT geohashesInBox(24.48, 40.56, 24.785, 40.81, 4) AS thasos +``` +``` +┌─thasos──────────────────────────────────────┐ +│ ['sx1q','sx1r','sx32','sx1w','sx1x','sx38'] │ +└─────────────────────────────────────────────┘ +``` + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/geo/) From 9cd9c694496d5ae793718c9577ec8bb3a5fedc77 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 6 Aug 2019 17:57:17 +0300 Subject: [PATCH 26/44] geohashesInbox(lon_min, lat_min, lon_max, lat_max, precision) function (#6127) --- dbms/src/DataTypes/IDataType.h | 1 - dbms/src/Functions/GeoUtils.cpp | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index e5020fe19de..359874c5660 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -602,7 +602,6 @@ inline bool isStringOrFixedString(const T & data_type) return WhichDataType(data_type).isStringOrFixedString(); } - inline bool isNotDecimalButComparableToDecimal(const DataTypePtr & data_type) { WhichDataType which(data_type); diff --git a/dbms/src/Functions/GeoUtils.cpp b/dbms/src/Functions/GeoUtils.cpp index 5134343dae0..d08216ad5c6 100644 --- a/dbms/src/Functions/GeoUtils.cpp +++ b/dbms/src/Functions/GeoUtils.cpp @@ -1,6 +1,8 @@ #include #include +#include + namespace { From 224ed0ca670ccc00921caa7f513b84bb0c28f9d9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 6 Aug 2019 17:59:19 +0300 Subject: [PATCH 27/44] Revert wrong merge commit. This reverts commit 9cd9c694496d5ae793718c9577ec8bb3a5fedc77. --- dbms/src/DataTypes/IDataType.h | 1 + dbms/src/Functions/GeoUtils.cpp | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 359874c5660..e5020fe19de 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -602,6 +602,7 @@ inline bool isStringOrFixedString(const T & data_type) return WhichDataType(data_type).isStringOrFixedString(); } + inline bool isNotDecimalButComparableToDecimal(const DataTypePtr & data_type) { WhichDataType which(data_type); diff --git a/dbms/src/Functions/GeoUtils.cpp b/dbms/src/Functions/GeoUtils.cpp index d08216ad5c6..5134343dae0 100644 --- a/dbms/src/Functions/GeoUtils.cpp +++ b/dbms/src/Functions/GeoUtils.cpp @@ -1,8 +1,6 @@ #include #include -#include - namespace { From aefc6648190495b15fefc9568ad8bec6b0585076 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 6 Aug 2019 21:54:06 +0300 Subject: [PATCH 28/44] refactor throwFromErrno --- dbms/src/Common/CounterInFile.h | 6 ++-- dbms/src/Common/Exception.cpp | 28 ++++++------------- dbms/src/Common/Exception.h | 7 +++-- dbms/src/Common/StatusFile.cpp | 8 +++--- dbms/src/Common/createHardLink.cpp | 9 +++--- dbms/src/IO/MMapReadBufferFromFile.cpp | 2 +- dbms/src/IO/ReadBufferAIO.cpp | 2 +- dbms/src/IO/ReadBufferFromFile.cpp | 4 +-- dbms/src/IO/ReadBufferFromFileDescriptor.cpp | 4 +-- dbms/src/IO/WriteBufferAIO.cpp | 8 +++--- dbms/src/IO/WriteBufferFromFile.cpp | 4 +-- dbms/src/IO/WriteBufferFromFileDescriptor.cpp | 8 +++--- dbms/src/IO/WriteBufferFromTemporaryFile.cpp | 2 +- .../DistributedBlockOutputStream.cpp | 2 +- .../src/Storages/MergeTree/DiskSpaceMonitor.h | 4 +-- .../Storages/MergeTree/MergeTreeDataPart.cpp | 6 ++-- dbms/src/Storages/StorageStripeLog.cpp | 2 +- dbms/src/Storages/StorageTinyLog.cpp | 2 +- .../tests/remove_symlink_directory.cpp | 2 +- 19 files changed, 51 insertions(+), 59 deletions(-) diff --git a/dbms/src/Common/CounterInFile.h b/dbms/src/Common/CounterInFile.h index cbf7105a728..6b982ad0a46 100644 --- a/dbms/src/Common/CounterInFile.h +++ b/dbms/src/Common/CounterInFile.h @@ -67,13 +67,13 @@ public: int fd = ::open(path.c_str(), O_RDWR | O_CREAT, 0666); if (-1 == fd) - DB::throwFromErrno("Cannot open file " + path, DB::ErrorCodes::CANNOT_OPEN_FILE); + DB::throwFromErrno("Cannot open file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); try { int flock_ret = flock(fd, LOCK_EX); if (-1 == flock_ret) - DB::throwFromErrno("Cannot lock file " + path, DB::ErrorCodes::CANNOT_OPEN_FILE); + DB::throwFromErrno("Cannot lock file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); if (!file_doesnt_exists) { @@ -141,7 +141,7 @@ public: int fd = ::open(path.c_str(), O_RDWR | O_CREAT, 0666); if (-1 == fd) - DB::throwFromErrno("Cannot open file " + path, DB::ErrorCodes::CANNOT_OPEN_FILE); + DB::throwFromErrno("Cannot open file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); try { diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 8bbeba6fd8b..39610af7540 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -55,6 +55,11 @@ void throwFromErrno(const std::string & s, int code, int e) throw ErrnoException(s + ", " + errnoToString(code, e), code, e); } +void throwFromErrno(const std::string & s, const std::string & path, int code, int the_errno) +{ + throw ErrnoException(s + ", " + errnoToString(code, the_errno), code, the_errno, path); +} + void tryLogCurrentException(const char * log_name, const std::string & start_of_message) { tryLogCurrentException(&Logger::get(log_name), start_of_message); @@ -73,15 +78,12 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_ void getNoSpaceLeftInfoMessage(std::filesystem::path path, std::string & msg) { + path = std::filesystem::absolute(path); /// It's possible to get ENOSPC for non existent file (e.g. if there are no free inodes and creat() fails) /// So try to get info for existent parent directory. while (!std::filesystem::exists(path) && path.has_relative_path()) path = path.parent_path(); - /// Most likely path is invalid - if (!path.has_relative_path()) - return; - auto fs = DiskSpaceMonitor::getStatVFS(path); msg += "\nTotal space: " + formatReadableSizeWithBinarySuffix(fs.f_blocks * fs.f_bsize) + "\nAvailable space: " + formatReadableSizeWithBinarySuffix(fs.f_bavail * fs.f_bsize) @@ -105,22 +107,8 @@ std::string getExtraExceptionInfo(const std::exception & e) } else if (auto errno_exception = dynamic_cast(&e)) { - if (errno_exception->getErrno() == ENOSPC) - { - /// Try to extract path from text exception message. Most likely the exception was thrown by - /// DB::throwFromErrno("Some message" + filename, ...); - /// We suppose "Some message " does not contain '/' and filename is an absolute path starts with '/'. - /// throwFromErrno appends ", errno: ..." to the first argument. - /// It's ugly hack which may not work correctly. However, getEnospcInfoMessage(...) checks if path exists. - size_t likely_path_begin = errno_exception->message().find('/'); - size_t likely_path_end = errno_exception->message().find(", errno: ", likely_path_begin); - if (likely_path_end != std::string::npos) - { - std::string supposed_to_be_path = errno_exception->message().substr(likely_path_begin, - likely_path_end - likely_path_begin); - getNoSpaceLeftInfoMessage(supposed_to_be_path, msg); - } - } + if (errno_exception->getErrno() == ENOSPC && errno_exception->getPath()) + getNoSpaceLeftInfoMessage(errno_exception->getPath().value(), msg); } } catch (...) { diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h index bf29490fe72..6416a6a6053 100644 --- a/dbms/src/Common/Exception.h +++ b/dbms/src/Common/Exception.h @@ -52,16 +52,18 @@ private: class ErrnoException : public Exception { public: - ErrnoException(const std::string & msg, int code, int saved_errno_) - : Exception(msg, code), saved_errno(saved_errno_) {} + ErrnoException(const std::string & msg, int code, int saved_errno_, const std::optional & path_ = {}) + : Exception(msg, code), saved_errno(saved_errno_), path(path_) {} ErrnoException * clone() const override { return new ErrnoException(*this); } void rethrow() const override { throw *this; } int getErrno() const { return saved_errno; } + const std::optional getPath() const { return path; } private: int saved_errno; + std::optional path; const char * name() const throw() override { return "DB::ErrnoException"; } const char * className() const throw() override { return "DB::ErrnoException"; } @@ -73,6 +75,7 @@ using Exceptions = std::vector; std::string errnoToString(int code, int the_errno = errno); [[noreturn]] void throwFromErrno(const std::string & s, int code, int the_errno = errno); +[[noreturn]] void throwFromErrno(const std::string & s, const std::string & path, int code, int the_errno = errno); /** Try to write an exception to the log (and forget about it). diff --git a/dbms/src/Common/StatusFile.cpp b/dbms/src/Common/StatusFile.cpp index afe42262b55..8cb5439afe3 100644 --- a/dbms/src/Common/StatusFile.cpp +++ b/dbms/src/Common/StatusFile.cpp @@ -51,7 +51,7 @@ StatusFile::StatusFile(const std::string & path_) fd = ::open(path.c_str(), O_WRONLY | O_CREAT, 0666); if (-1 == fd) - throwFromErrno("Cannot open file " + path, ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrno("Cannot open file " + path, path, ErrorCodes::CANNOT_OPEN_FILE); try { @@ -61,14 +61,14 @@ StatusFile::StatusFile(const std::string & path_) if (errno == EWOULDBLOCK) throw Exception("Cannot lock file " + path + ". Another server instance in same directory is already running.", ErrorCodes::CANNOT_OPEN_FILE); else - throwFromErrno("Cannot lock file " + path, ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrno("Cannot lock file " + path, path, ErrorCodes::CANNOT_OPEN_FILE); } if (0 != ftruncate(fd, 0)) - throwFromErrno("Cannot ftruncate " + path, ErrorCodes::CANNOT_TRUNCATE_FILE); + throwFromErrno("Cannot ftruncate " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE); if (0 != lseek(fd, 0, SEEK_SET)) - throwFromErrno("Cannot lseek " + path, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throwFromErrno("Cannot lseek " + path, path, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); /// Write information about current server instance to the file. { diff --git a/dbms/src/Common/createHardLink.cpp b/dbms/src/Common/createHardLink.cpp index 824b7e99086..2e6175de64e 100644 --- a/dbms/src/Common/createHardLink.cpp +++ b/dbms/src/Common/createHardLink.cpp @@ -26,16 +26,17 @@ void createHardLink(const String & source_path, const String & destination_path) struct stat destination_descr; if (0 != lstat(source_path.c_str(), &source_descr)) - throwFromErrno("Cannot stat " + source_path, ErrorCodes::CANNOT_STAT); + throwFromErrno("Cannot stat " + source_path, source_path, ErrorCodes::CANNOT_STAT); if (0 != lstat(destination_path.c_str(), &destination_descr)) - throwFromErrno("Cannot stat " + destination_path, ErrorCodes::CANNOT_STAT); + throwFromErrno("Cannot stat " + destination_path, destination_path, ErrorCodes::CANNOT_STAT); if (source_descr.st_ino != destination_descr.st_ino) - throwFromErrno("Destination file " + destination_path + " is already exist and have different inode.", ErrorCodes::CANNOT_LINK, link_errno); + throwFromErrno("Destination file " + destination_path + " is already exist and have different inode.", + destination_path, ErrorCodes::CANNOT_LINK, link_errno); } else - throwFromErrno("Cannot link " + source_path + " to " + destination_path, ErrorCodes::CANNOT_LINK); + throwFromErrno("Cannot link " + source_path + " to " + destination_path, destination_path, ErrorCodes::CANNOT_LINK); } } diff --git a/dbms/src/IO/MMapReadBufferFromFile.cpp b/dbms/src/IO/MMapReadBufferFromFile.cpp index 74c07c40782..c4b3fa6222b 100644 --- a/dbms/src/IO/MMapReadBufferFromFile.cpp +++ b/dbms/src/IO/MMapReadBufferFromFile.cpp @@ -29,7 +29,7 @@ void MMapReadBufferFromFile::open(const std::string & file_name) fd = ::open(file_name.c_str(), O_RDONLY); if (-1 == fd) - throwFromErrno("Cannot open file " + file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrno("Cannot open file " + file_name, file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); } diff --git a/dbms/src/IO/ReadBufferAIO.cpp b/dbms/src/IO/ReadBufferAIO.cpp index f47e04bff75..f76e000a12c 100644 --- a/dbms/src/IO/ReadBufferAIO.cpp +++ b/dbms/src/IO/ReadBufferAIO.cpp @@ -54,7 +54,7 @@ ReadBufferAIO::ReadBufferAIO(const std::string & filename_, size_t buffer_size_, if (fd == -1) { auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; - throwFromErrno("Cannot open file " + filename, error_code); + throwFromErrno("Cannot open file " + filename, filename, error_code); } } diff --git a/dbms/src/IO/ReadBufferFromFile.cpp b/dbms/src/IO/ReadBufferFromFile.cpp index b94fce8e033..c37d8b24595 100644 --- a/dbms/src/IO/ReadBufferFromFile.cpp +++ b/dbms/src/IO/ReadBufferFromFile.cpp @@ -41,12 +41,12 @@ ReadBufferFromFile::ReadBufferFromFile( fd = ::open(file_name.c_str(), flags == -1 ? O_RDONLY : flags); if (-1 == fd) - throwFromErrno("Cannot open file " + file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrno("Cannot open file " + file_name, file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); #ifdef __APPLE__ if (o_direct) { if (fcntl(fd, F_NOCACHE, 1) == -1) - throwFromErrno("Cannot set F_NOCACHE on file " + file_name, ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrno("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE); } #endif } diff --git a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp index 70cc84567f3..000abc3a7fc 100644 --- a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp @@ -61,7 +61,7 @@ bool ReadBufferFromFileDescriptor::nextImpl() if (-1 == res && errno != EINTR) { ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed); - throwFromErrno("Cannot read from file " + getFileName(), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + throwFromErrno("Cannot read from file " + getFileName(), getFileName(), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); } if (res > 0) @@ -124,7 +124,7 @@ off_t ReadBufferFromFileDescriptor::doSeek(off_t offset, int whence) pos = working_buffer.end(); off_t res = ::lseek(fd, new_pos, SEEK_SET); if (-1 == res) - throwFromErrno("Cannot seek through file " + getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throwFromErrno("Cannot seek through file " + getFileName(), getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); pos_in_file = new_pos; watch.stop(); diff --git a/dbms/src/IO/WriteBufferAIO.cpp b/dbms/src/IO/WriteBufferAIO.cpp index 2fe7da27809..2bbdad95b04 100644 --- a/dbms/src/IO/WriteBufferAIO.cpp +++ b/dbms/src/IO/WriteBufferAIO.cpp @@ -62,7 +62,7 @@ WriteBufferAIO::WriteBufferAIO(const std::string & filename_, size_t buffer_size if (fd == -1) { auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; - throwFromErrno("Cannot open file " + filename, error_code); + throwFromErrno("Cannot open file " + filename, filename, error_code); } } @@ -96,7 +96,7 @@ void WriteBufferAIO::sync() /// Ask OS to flush data to disk. int res = ::fsync(fd); if (res == -1) - throwFromErrno("Cannot fsync " + getFileName(), ErrorCodes::CANNOT_FSYNC); + throwFromErrno("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); } void WriteBufferAIO::nextImpl() @@ -173,7 +173,7 @@ void WriteBufferAIO::doTruncate(off_t length) int res = ::ftruncate(fd, length); if (res == -1) - throwFromErrno("Cannot truncate file " + filename, ErrorCodes::CANNOT_TRUNCATE_FILE); + throwFromErrno("Cannot truncate file " + filename, filename, ErrorCodes::CANNOT_TRUNCATE_FILE); } void WriteBufferAIO::flush() @@ -427,7 +427,7 @@ void WriteBufferAIO::finalize() /// Truncate the file to remove unnecessary zeros from it. int res = ::ftruncate(fd, max_pos_in_file); if (res == -1) - throwFromErrno("Cannot truncate file " + filename, ErrorCodes::CANNOT_TRUNCATE_FILE); + throwFromErrno("Cannot truncate file " + filename, filename, ErrorCodes::CANNOT_TRUNCATE_FILE); } } diff --git a/dbms/src/IO/WriteBufferFromFile.cpp b/dbms/src/IO/WriteBufferFromFile.cpp index 3082f674fff..a928faa1eaf 100644 --- a/dbms/src/IO/WriteBufferFromFile.cpp +++ b/dbms/src/IO/WriteBufferFromFile.cpp @@ -44,13 +44,13 @@ WriteBufferFromFile::WriteBufferFromFile( fd = ::open(file_name.c_str(), flags == -1 ? O_WRONLY | O_TRUNC | O_CREAT : flags, mode); if (-1 == fd) - throwFromErrno("Cannot open file " + file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrno("Cannot open file " + file_name, file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); #ifdef __APPLE__ if (o_direct) { if (fcntl(fd, F_NOCACHE, 1) == -1) - throwFromErrno("Cannot set F_NOCACHE on file " + file_name, ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrno("Cannot set F_NOCACHE on file " + file_name, file_name, ErrorCodes::CANNOT_OPEN_FILE); } #endif } diff --git a/dbms/src/IO/WriteBufferFromFileDescriptor.cpp b/dbms/src/IO/WriteBufferFromFileDescriptor.cpp index 0ca39b47ada..2efc0c9949d 100644 --- a/dbms/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/WriteBufferFromFileDescriptor.cpp @@ -56,7 +56,7 @@ void WriteBufferFromFileDescriptor::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); - throwFromErrno("Cannot write to file " + getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); + throwFromErrno("Cannot write to file " + getFileName(), getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } if (res > 0) @@ -111,7 +111,7 @@ void WriteBufferFromFileDescriptor::sync() /// Request OS to sync data with storage medium. int res = fsync(fd); if (-1 == res) - throwFromErrno("Cannot fsync " + getFileName(), ErrorCodes::CANNOT_FSYNC); + throwFromErrno("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); } @@ -119,7 +119,7 @@ off_t WriteBufferFromFileDescriptor::doSeek(off_t offset, int whence) { off_t res = lseek(fd, offset, whence); if (-1 == res) - throwFromErrno("Cannot seek through file " + getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throwFromErrno("Cannot seek through file " + getFileName(), getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); return res; } @@ -128,7 +128,7 @@ void WriteBufferFromFileDescriptor::doTruncate(off_t length) { int res = ftruncate(fd, length); if (-1 == res) - throwFromErrno("Cannot truncate file " + getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE); + throwFromErrno("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE); } } diff --git a/dbms/src/IO/WriteBufferFromTemporaryFile.cpp b/dbms/src/IO/WriteBufferFromTemporaryFile.cpp index e1250c58097..0c0e41ece3d 100644 --- a/dbms/src/IO/WriteBufferFromTemporaryFile.cpp +++ b/dbms/src/IO/WriteBufferFromTemporaryFile.cpp @@ -39,7 +39,7 @@ public: off_t res = lseek(fd, 0, SEEK_SET); if (-1 == res) - throwFromErrno("Cannot reread temporary file " + file_name, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throwFromErrno("Cannot reread temporary file " + file_name, file_name, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); return std::make_shared(fd, file_name, std::move(origin->tmp_file)); } diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index da374b1b65d..89b6bdcfd19 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -565,7 +565,7 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std:: } if (link(first_file_tmp_path.data(), block_file_path.data())) - throwFromErrno("Could not link " + block_file_path + " to " + first_file_tmp_path, ErrorCodes::CANNOT_LINK); + throwFromErrno("Could not link " + block_file_path + " to " + first_file_tmp_path, block_file_path, ErrorCodes::CANNOT_LINK); } /** remove the temporary file, enabling the OS to reclaim inode after all threads diff --git a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h index 9ee35b7b61b..0de440fc639 100644 --- a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h +++ b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h @@ -109,7 +109,7 @@ public: { struct statvfs fs; if (statvfs(path.c_str(), &fs) != 0) - throwFromErrno("Could not calculate available disk space (statvfs)", ErrorCodes::CANNOT_STATVFS); + throwFromErrno("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS); return fs; } @@ -166,7 +166,7 @@ public: { struct stat st; if (stat(p.c_str(), &st)) - throwFromErrno("Cannot stat " + p.string(), errno); + throwFromErrno("Cannot stat " + p.string(), p.string(), ErrorCodes::SYSTEM_ERROR); return st.st_dev; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index cdd72a222bd..c83eaae0264 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -413,7 +413,7 @@ void MergeTreeDataPart::remove() const { String path_to_remove = to + "/" + file; if (0 != unlink(path_to_remove.c_str())) - throwFromErrno("Cannot unlink file " + path_to_remove, ErrorCodes::CANNOT_UNLINK); + throwFromErrno("Cannot unlink file " + path_to_remove, path_to_remove, ErrorCodes::CANNOT_UNLINK); } #if !__clang__ #pragma GCC diagnostic pop @@ -423,11 +423,11 @@ void MergeTreeDataPart::remove() const { String path_to_remove = to + "/" + file; if (0 != unlink(path_to_remove.c_str())) - throwFromErrno("Cannot unlink file " + path_to_remove, ErrorCodes::CANNOT_UNLINK); + throwFromErrno("Cannot unlink file " + path_to_remove, path_to_remove, ErrorCodes::CANNOT_UNLINK); } if (0 != rmdir(to.c_str())) - throwFromErrno("Cannot rmdir file " + to, ErrorCodes::CANNOT_UNLINK); + throwFromErrno("Cannot rmdir file " + to, to, ErrorCodes::CANNOT_UNLINK); } catch (...) { diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index 3052962606d..1b75a4b549f 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -214,7 +214,7 @@ StorageStripeLog::StorageStripeLog( { /// create files if they do not exist if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST) - throwFromErrno("Cannot create directory " + full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); + throwFromErrno("Cannot create directory " + full_path, full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); } } diff --git a/dbms/src/Storages/StorageTinyLog.cpp b/dbms/src/Storages/StorageTinyLog.cpp index 214964c32b4..4b88e83c1ba 100644 --- a/dbms/src/Storages/StorageTinyLog.cpp +++ b/dbms/src/Storages/StorageTinyLog.cpp @@ -343,7 +343,7 @@ StorageTinyLog::StorageTinyLog( { /// create files if they do not exist if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST) - throwFromErrno("Cannot create directory " + full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); + throwFromErrno("Cannot create directory " + full_path, full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); } for (const auto & col : getColumns().getAllPhysical()) diff --git a/dbms/src/Storages/tests/remove_symlink_directory.cpp b/dbms/src/Storages/tests/remove_symlink_directory.cpp index 8098ee5dc32..b193c181c30 100644 --- a/dbms/src/Storages/tests/remove_symlink_directory.cpp +++ b/dbms/src/Storages/tests/remove_symlink_directory.cpp @@ -22,7 +22,7 @@ try Poco::File("./test_dir/file").createFile(); if (0 != symlink("./test_dir", "./test_link")) - DB::throwFromErrno("Cannot create symlink", DB::ErrorCodes::SYSTEM_ERROR); + DB::throwFromErrno("Cannot create symlink", "./test_link", DB::ErrorCodes::SYSTEM_ERROR); Poco::File link("./test_link"); link.renameTo("./test_link2"); From 0ee3e21fbc6cb3077a0456d54a2c47424726e879 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 6 Aug 2019 23:39:07 +0300 Subject: [PATCH 29/44] fix style --- dbms/src/Common/Exception.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 39610af7540..89200231b27 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -111,7 +111,8 @@ std::string getExtraExceptionInfo(const std::exception & e) getNoSpaceLeftInfoMessage(errno_exception->getPath().value(), msg); } } - catch (...) { + catch (...) + { msg += "\nCannot print extra info: " + getCurrentExceptionMessage(false, false, false); } From 1ea53eaf2369070ffc62bef5a1be0d4e0b12a59d Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 7 Aug 2019 10:46:15 +0800 Subject: [PATCH 30/44] add domain zh docs --- docs/zh/data_types/domain/ipv4.md | 78 +++++++++++++++++++++++++++ docs/zh/data_types/domain/ipv6.md | 78 +++++++++++++++++++++++++++ docs/zh/data_types/domain/overview.md | 26 +++++++++ 3 files changed, 182 insertions(+) create mode 100644 docs/zh/data_types/domain/ipv4.md create mode 100644 docs/zh/data_types/domain/ipv6.md create mode 100644 docs/zh/data_types/domain/overview.md diff --git a/docs/zh/data_types/domain/ipv4.md b/docs/zh/data_types/domain/ipv4.md new file mode 100644 index 00000000000..4adf13409fe --- /dev/null +++ b/docs/zh/data_types/domain/ipv4.md @@ -0,0 +1,78 @@ +## IPv4 + +`IPv4`是与`UInt32`类型保持二进制兼容的Domain类型,其用于存储IPv4地址的值。它提供了更为紧凑的二进制存储的同时支持识别可读性更加友好的输入输出格式。 + +### 基本使用 + +``` sql +CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY url; + +DESCRIBE TABLE hits; +``` + +``` +┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐ +│ url │ String │ │ │ │ │ +│ from │ IPv4 │ │ │ │ │ +└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ +``` + +同时您也可以使用`IPv4`类型的列作为主键: + +``` sql +CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; +``` + +在写入与查询时,`IPv4`类型能够识别可读性更加友好的输入输出格式: + +``` sql +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.yandex', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); + +SELECT * FROM hits; +``` + +``` +┌─url────────────────────────────────┬───────────from─┐ +│ https://clickhouse.yandex/docs/en/ │ 116.106.34.242 │ +│ https://wikipedia.org │ 116.253.40.133 │ +│ https://clickhouse.yandex │ 183.247.232.58 │ +└────────────────────────────────────┴────────────────┘ +``` + +同时它提供更为紧凑的二进制存储格式: + +``` sql +SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(from)─┬─hex(from)─┐ +│ IPv4 │ B7F7E83A │ +└──────────────────┴───────────┘ +``` + +不可隐式转换为除`UInt32`以外的其他类型类型。如果要将`IPv4`类型的值转换成字符串,你可以使用`IPv4NumToString()`显示的进行转换: + +``` sql +SELECT toTypeName(s), IPv4NumToString(from) as s FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(IPv4NumToString(from))─┬─s──────────────┐ +│ String │ 183.247.232.58 │ +└───────────────────────────────────┴────────────────┘ +``` + +或可以使用`CAST`将它转换为`UInt32`类型: + +``` sql +SELECT toTypeName(i), CAST(from as UInt32) as i FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(CAST(from, 'UInt32'))─┬──────────i─┐ +│ UInt32 │ 3086477370 │ +└──────────────────────────────────┴────────────┘ +``` + +[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/ipv4) diff --git a/docs/zh/data_types/domain/ipv6.md b/docs/zh/data_types/domain/ipv6.md new file mode 100644 index 00000000000..1209350990f --- /dev/null +++ b/docs/zh/data_types/domain/ipv6.md @@ -0,0 +1,78 @@ +## IPv6 + +`IPv6`是与`FixedString(16)`类型保持二进制兼容的Domain类型,其用于存储IPv6地址的值。它提供了更为紧凑的二进制存储的同时支持识别可读性更加友好的输入输出格式。 + +### 基本用法 + +``` sql +CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY url; + +DESCRIBE TABLE hits; +``` + +``` +┌─name─┬─type───┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┐ +│ url │ String │ │ │ │ │ +│ from │ IPv6 │ │ │ │ │ +└──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┘ +``` + +同时您也可以使用`IPv6`类型的列作为主键: + +``` sql +CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; +``` + +在写入与查询时,`IPv6`类型能够识别可读性更加友好的输入输出格式: + +``` sql +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.yandex', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); + +SELECT * FROM hits; +``` + +``` +┌─url────────────────────────────────┬─from──────────────────────────┐ +│ https://clickhouse.yandex │ 2001:44c8:129:2632:33:0:252:2 │ +│ https://clickhouse.yandex/docs/en/ │ 2a02:e980:1e::1 │ +│ https://wikipedia.org │ 2a02:aa08:e000:3100::2 │ +└────────────────────────────────────┴───────────────────────────────┘ +``` + +同时它提供更为紧凑的二进制存储格式: + +``` sql +SELECT toTypeName(from), hex(from) FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(from)─┬─hex(from)────────────────────────┐ +│ IPv6 │ 200144C8012926320033000002520002 │ +└──────────────────┴──────────────────────────────────┘ +``` + +不可隐式转换为除`FixedString(16)`以外的其他类型类型。如果要将`IPv6`类型的值转换成字符串,你可以使用`IPv6NumToString()`显示的进行转换: + +``` sql +SELECT toTypeName(s), IPv6NumToString(from) as s FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(IPv6NumToString(from))─┬─s─────────────────────────────┐ +│ String │ 2001:44c8:129:2632:33:0:252:2 │ +└───────────────────────────────────┴───────────────────────────────┘ +``` + +或使用`CAST`将其转换为`FixedString(16)`: + +``` sql +SELECT toTypeName(i), CAST(from as FixedString(16)) as i FROM hits LIMIT 1; +``` + +``` +┌─toTypeName(CAST(from, 'FixedString(16)'))─┬─i───────┐ +│ FixedString(16) │ ��� │ +└───────────────────────────────────────────┴─────────┘ +``` + +[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/ipv6) diff --git a/docs/zh/data_types/domain/overview.md b/docs/zh/data_types/domain/overview.md new file mode 100644 index 00000000000..b4db116e75b --- /dev/null +++ b/docs/zh/data_types/domain/overview.md @@ -0,0 +1,26 @@ +# Domains + +Domain类型是特定实现的类型,它总是与某个现存的基础类型保持二进制兼容的同时添加一些额外的特性,以能够在维持磁盘数据不变的情况下使用这些额外的特性。目前ClickHouse暂不支持自定义domain类型。 + +如果你可以在一个地方使用与Domain类型二进制兼容的基础类型,那么在相同的地方您也可以使用Domain类型,例如: + +* 使用Domain类型作为表中列的类型 +* 对Domain类型的列进行读/写数据 +* 如果与Domain二进制兼容的基础类型可以作为索引,那么Domain类型也可以作为索引 +* 将Domain类型作为参数传递给函数使用 +* 其他 + +### Domains的额外特性 + +* 在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时,其对应的列总是展示为Domain类型的名称 +* 在INSERT INTO domain_table(domain_column) VALUES(...)中输入数据总是以更人性化的格式进行输入 +* 在SELECT domain_column FROM domain_table中数据总是以更人性化的格式输出 +* 在INSERT INTO domain_table FORMAT CSV ...中,实现外部源数据以更人性化的格式载入 + +### Domains类型的限制 + +* 无法通过`ALTER TABLE`将基础类型的索引转换为Domain类型的索引。 +* 当从其他列或表插入数据时,无法将string类型的值隐式地转换为Domain类型的值。 +* 无法对存储为Domain类型的值添加约束。 + +[来源文章](https://clickhouse.yandex/docs/en/data_types/domains/overview) From 5cf183cbe3cc9f88d557ef0ab0e264dd9de78ddf Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 7 Aug 2019 15:52:47 +0300 Subject: [PATCH 31/44] fixes after review --- dbms/src/Common/CounterInFile.h | 6 +++--- dbms/src/Common/Exception.cpp | 4 +++- dbms/src/Common/Exception.h | 3 ++- dbms/src/Common/StatusFile.cpp | 8 ++++---- dbms/src/Common/createHardLink.cpp | 12 +++++++----- dbms/src/IO/MMapReadBufferFromFile.cpp | 3 ++- dbms/src/IO/ReadBufferAIO.cpp | 2 +- dbms/src/IO/ReadBufferFromFile.cpp | 3 ++- dbms/src/IO/ReadBufferFromFileDescriptor.cpp | 6 ++++-- dbms/src/IO/WriteBufferAIO.cpp | 8 ++++---- dbms/src/IO/WriteBufferFromFile.cpp | 3 ++- dbms/src/IO/WriteBufferFromFileDescriptor.cpp | 10 ++++++---- dbms/src/IO/WriteBufferFromTemporaryFile.cpp | 3 ++- .../Distributed/DistributedBlockOutputStream.cpp | 3 ++- dbms/src/Storages/MergeTree/DiskSpaceMonitor.h | 5 +++-- dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 8 +++++--- dbms/src/Storages/StorageStripeLog.cpp | 3 ++- dbms/src/Storages/StorageTinyLog.cpp | 3 ++- dbms/src/Storages/tests/remove_symlink_directory.cpp | 2 +- 19 files changed, 57 insertions(+), 38 deletions(-) diff --git a/dbms/src/Common/CounterInFile.h b/dbms/src/Common/CounterInFile.h index 6b982ad0a46..537d399e84f 100644 --- a/dbms/src/Common/CounterInFile.h +++ b/dbms/src/Common/CounterInFile.h @@ -67,13 +67,13 @@ public: int fd = ::open(path.c_str(), O_RDWR | O_CREAT, 0666); if (-1 == fd) - DB::throwFromErrno("Cannot open file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); + DB::throwFromErrnoWithPath("Cannot open file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); try { int flock_ret = flock(fd, LOCK_EX); if (-1 == flock_ret) - DB::throwFromErrno("Cannot lock file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); + DB::throwFromErrnoWithPath("Cannot lock file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); if (!file_doesnt_exists) { @@ -141,7 +141,7 @@ public: int fd = ::open(path.c_str(), O_RDWR | O_CREAT, 0666); if (-1 == fd) - DB::throwFromErrno("Cannot open file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); + DB::throwFromErrnoWithPath("Cannot open file " + path, path, DB::ErrorCodes::CANNOT_OPEN_FILE); try { diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 89200231b27..0b64c65b791 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -55,7 +55,7 @@ void throwFromErrno(const std::string & s, int code, int e) throw ErrnoException(s + ", " + errnoToString(code, e), code, e); } -void throwFromErrno(const std::string & s, const std::string & path, int code, int the_errno) +void throwFromErrnoWithPath(const std::string & s, const std::string & path, int code, int the_errno) { throw ErrnoException(s + ", " + errnoToString(code, the_errno), code, the_errno, path); } @@ -92,7 +92,9 @@ void getNoSpaceLeftInfoMessage(std::filesystem::path path, std::string & msg) auto mount_point = DiskSpaceMonitor::getMountPoint(path).string(); msg += "\nMount point: " + mount_point; +#if defined(__linux__) msg += "\nFilesystem: " + DiskSpaceMonitor::getFilesystemName(mount_point); +#endif } std::string getExtraExceptionInfo(const std::exception & e) diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h index 6416a6a6053..bd4d6e0be09 100644 --- a/dbms/src/Common/Exception.h +++ b/dbms/src/Common/Exception.h @@ -75,7 +75,8 @@ using Exceptions = std::vector; std::string errnoToString(int code, int the_errno = errno); [[noreturn]] void throwFromErrno(const std::string & s, int code, int the_errno = errno); -[[noreturn]] void throwFromErrno(const std::string & s, const std::string & path, int code, int the_errno = errno); +[[noreturn]] void throwFromErrnoWithPath(const std::string & s, const std::string & path, int code, + int the_errno = errno); /** Try to write an exception to the log (and forget about it). diff --git a/dbms/src/Common/StatusFile.cpp b/dbms/src/Common/StatusFile.cpp index 8cb5439afe3..e0f7788815c 100644 --- a/dbms/src/Common/StatusFile.cpp +++ b/dbms/src/Common/StatusFile.cpp @@ -51,7 +51,7 @@ StatusFile::StatusFile(const std::string & path_) fd = ::open(path.c_str(), O_WRONLY | O_CREAT, 0666); if (-1 == fd) - throwFromErrno("Cannot open file " + path, path, ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrnoWithPath("Cannot open file " + path, path, ErrorCodes::CANNOT_OPEN_FILE); try { @@ -61,14 +61,14 @@ StatusFile::StatusFile(const std::string & path_) if (errno == EWOULDBLOCK) throw Exception("Cannot lock file " + path + ". Another server instance in same directory is already running.", ErrorCodes::CANNOT_OPEN_FILE); else - throwFromErrno("Cannot lock file " + path, path, ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrnoWithPath("Cannot lock file " + path, path, ErrorCodes::CANNOT_OPEN_FILE); } if (0 != ftruncate(fd, 0)) - throwFromErrno("Cannot ftruncate " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE); + throwFromErrnoWithPath("Cannot ftruncate " + path, path, ErrorCodes::CANNOT_TRUNCATE_FILE); if (0 != lseek(fd, 0, SEEK_SET)) - throwFromErrno("Cannot lseek " + path, path, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throwFromErrnoWithPath("Cannot lseek " + path, path, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); /// Write information about current server instance to the file. { diff --git a/dbms/src/Common/createHardLink.cpp b/dbms/src/Common/createHardLink.cpp index 2e6175de64e..5ed82231b26 100644 --- a/dbms/src/Common/createHardLink.cpp +++ b/dbms/src/Common/createHardLink.cpp @@ -26,17 +26,19 @@ void createHardLink(const String & source_path, const String & destination_path) struct stat destination_descr; if (0 != lstat(source_path.c_str(), &source_descr)) - throwFromErrno("Cannot stat " + source_path, source_path, ErrorCodes::CANNOT_STAT); + throwFromErrnoWithPath("Cannot stat " + source_path, source_path, ErrorCodes::CANNOT_STAT); if (0 != lstat(destination_path.c_str(), &destination_descr)) - throwFromErrno("Cannot stat " + destination_path, destination_path, ErrorCodes::CANNOT_STAT); + throwFromErrnoWithPath("Cannot stat " + destination_path, destination_path, ErrorCodes::CANNOT_STAT); if (source_descr.st_ino != destination_descr.st_ino) - throwFromErrno("Destination file " + destination_path + " is already exist and have different inode.", - destination_path, ErrorCodes::CANNOT_LINK, link_errno); + throwFromErrnoWithPath( + "Destination file " + destination_path + " is already exist and have different inode.", + destination_path, ErrorCodes::CANNOT_LINK, link_errno); } else - throwFromErrno("Cannot link " + source_path + " to " + destination_path, destination_path, ErrorCodes::CANNOT_LINK); + throwFromErrnoWithPath("Cannot link " + source_path + " to " + destination_path, destination_path, + ErrorCodes::CANNOT_LINK); } } diff --git a/dbms/src/IO/MMapReadBufferFromFile.cpp b/dbms/src/IO/MMapReadBufferFromFile.cpp index c4b3fa6222b..e478a11c16a 100644 --- a/dbms/src/IO/MMapReadBufferFromFile.cpp +++ b/dbms/src/IO/MMapReadBufferFromFile.cpp @@ -29,7 +29,8 @@ void MMapReadBufferFromFile::open(const std::string & file_name) fd = ::open(file_name.c_str(), O_RDONLY); if (-1 == fd) - throwFromErrno("Cannot open file " + file_name, file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrnoWithPath("Cannot open file " + file_name, file_name, + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); } diff --git a/dbms/src/IO/ReadBufferAIO.cpp b/dbms/src/IO/ReadBufferAIO.cpp index f76e000a12c..ada6bca907d 100644 --- a/dbms/src/IO/ReadBufferAIO.cpp +++ b/dbms/src/IO/ReadBufferAIO.cpp @@ -54,7 +54,7 @@ ReadBufferAIO::ReadBufferAIO(const std::string & filename_, size_t buffer_size_, if (fd == -1) { auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; - throwFromErrno("Cannot open file " + filename, filename, error_code); + throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); } } diff --git a/dbms/src/IO/ReadBufferFromFile.cpp b/dbms/src/IO/ReadBufferFromFile.cpp index c37d8b24595..b9cd7caf155 100644 --- a/dbms/src/IO/ReadBufferFromFile.cpp +++ b/dbms/src/IO/ReadBufferFromFile.cpp @@ -41,7 +41,8 @@ ReadBufferFromFile::ReadBufferFromFile( fd = ::open(file_name.c_str(), flags == -1 ? O_RDONLY : flags); if (-1 == fd) - throwFromErrno("Cannot open file " + file_name, file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrnoWithPath("Cannot open file " + file_name, file_name, + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); #ifdef __APPLE__ if (o_direct) { diff --git a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp index 000abc3a7fc..db79d078c65 100644 --- a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp @@ -61,7 +61,8 @@ bool ReadBufferFromFileDescriptor::nextImpl() if (-1 == res && errno != EINTR) { ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed); - throwFromErrno("Cannot read from file " + getFileName(), getFileName(), ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + throwFromErrnoWithPath("Cannot read from file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); } if (res > 0) @@ -124,7 +125,8 @@ off_t ReadBufferFromFileDescriptor::doSeek(off_t offset, int whence) pos = working_buffer.end(); off_t res = ::lseek(fd, new_pos, SEEK_SET); if (-1 == res) - throwFromErrno("Cannot seek through file " + getFileName(), getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_SEEK_THROUGH_FILE); pos_in_file = new_pos; watch.stop(); diff --git a/dbms/src/IO/WriteBufferAIO.cpp b/dbms/src/IO/WriteBufferAIO.cpp index 2bbdad95b04..2dc177111a1 100644 --- a/dbms/src/IO/WriteBufferAIO.cpp +++ b/dbms/src/IO/WriteBufferAIO.cpp @@ -62,7 +62,7 @@ WriteBufferAIO::WriteBufferAIO(const std::string & filename_, size_t buffer_size if (fd == -1) { auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; - throwFromErrno("Cannot open file " + filename, filename, error_code); + throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); } } @@ -96,7 +96,7 @@ void WriteBufferAIO::sync() /// Ask OS to flush data to disk. int res = ::fsync(fd); if (res == -1) - throwFromErrno("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); + throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); } void WriteBufferAIO::nextImpl() @@ -173,7 +173,7 @@ void WriteBufferAIO::doTruncate(off_t length) int res = ::ftruncate(fd, length); if (res == -1) - throwFromErrno("Cannot truncate file " + filename, filename, ErrorCodes::CANNOT_TRUNCATE_FILE); + throwFromErrnoWithPath("Cannot truncate file " + filename, filename, ErrorCodes::CANNOT_TRUNCATE_FILE); } void WriteBufferAIO::flush() @@ -427,7 +427,7 @@ void WriteBufferAIO::finalize() /// Truncate the file to remove unnecessary zeros from it. int res = ::ftruncate(fd, max_pos_in_file); if (res == -1) - throwFromErrno("Cannot truncate file " + filename, filename, ErrorCodes::CANNOT_TRUNCATE_FILE); + throwFromErrnoWithPath("Cannot truncate file " + filename, filename, ErrorCodes::CANNOT_TRUNCATE_FILE); } } diff --git a/dbms/src/IO/WriteBufferFromFile.cpp b/dbms/src/IO/WriteBufferFromFile.cpp index a928faa1eaf..5e6fd7d6fe1 100644 --- a/dbms/src/IO/WriteBufferFromFile.cpp +++ b/dbms/src/IO/WriteBufferFromFile.cpp @@ -44,7 +44,8 @@ WriteBufferFromFile::WriteBufferFromFile( fd = ::open(file_name.c_str(), flags == -1 ? O_WRONLY | O_TRUNC | O_CREAT : flags, mode); if (-1 == fd) - throwFromErrno("Cannot open file " + file_name, file_name, errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + throwFromErrnoWithPath("Cannot open file " + file_name, file_name, + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); #ifdef __APPLE__ if (o_direct) diff --git a/dbms/src/IO/WriteBufferFromFileDescriptor.cpp b/dbms/src/IO/WriteBufferFromFileDescriptor.cpp index 2efc0c9949d..bfa1e9582d3 100644 --- a/dbms/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/WriteBufferFromFileDescriptor.cpp @@ -56,7 +56,8 @@ void WriteBufferFromFileDescriptor::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); - throwFromErrno("Cannot write to file " + getFileName(), getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); + throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } if (res > 0) @@ -111,7 +112,7 @@ void WriteBufferFromFileDescriptor::sync() /// Request OS to sync data with storage medium. int res = fsync(fd); if (-1 == res) - throwFromErrno("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); + throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC); } @@ -119,7 +120,8 @@ off_t WriteBufferFromFileDescriptor::doSeek(off_t offset, int whence) { off_t res = lseek(fd, offset, whence); if (-1 == res) - throwFromErrno("Cannot seek through file " + getFileName(), getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), + ErrorCodes::CANNOT_SEEK_THROUGH_FILE); return res; } @@ -128,7 +130,7 @@ void WriteBufferFromFileDescriptor::doTruncate(off_t length) { int res = ftruncate(fd, length); if (-1 == res) - throwFromErrno("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE); + throwFromErrnoWithPath("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE); } } diff --git a/dbms/src/IO/WriteBufferFromTemporaryFile.cpp b/dbms/src/IO/WriteBufferFromTemporaryFile.cpp index 0c0e41ece3d..c5a6bc04350 100644 --- a/dbms/src/IO/WriteBufferFromTemporaryFile.cpp +++ b/dbms/src/IO/WriteBufferFromTemporaryFile.cpp @@ -39,7 +39,8 @@ public: off_t res = lseek(fd, 0, SEEK_SET); if (-1 == res) - throwFromErrno("Cannot reread temporary file " + file_name, file_name, ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throwFromErrnoWithPath("Cannot reread temporary file " + file_name, file_name, + ErrorCodes::CANNOT_SEEK_THROUGH_FILE); return std::make_shared(fd, file_name, std::move(origin->tmp_file)); } diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 89b6bdcfd19..97104fc8f71 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -565,7 +565,8 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std:: } if (link(first_file_tmp_path.data(), block_file_path.data())) - throwFromErrno("Could not link " + block_file_path + " to " + first_file_tmp_path, block_file_path, ErrorCodes::CANNOT_LINK); + throwFromErrnoWithPath("Could not link " + block_file_path + " to " + first_file_tmp_path, block_file_path, + ErrorCodes::CANNOT_LINK); } /** remove the temporary file, enabling the OS to reclaim inode after all threads diff --git a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h index 0de440fc639..bd95d10e834 100644 --- a/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h +++ b/dbms/src/Storages/MergeTree/DiskSpaceMonitor.h @@ -109,7 +109,8 @@ public: { struct statvfs fs; if (statvfs(path.c_str(), &fs) != 0) - throwFromErrno("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS); + throwFromErrnoWithPath("Could not calculate available disk space (statvfs)", path, + ErrorCodes::CANNOT_STATVFS); return fs; } @@ -166,7 +167,7 @@ public: { struct stat st; if (stat(p.c_str(), &st)) - throwFromErrno("Cannot stat " + p.string(), p.string(), ErrorCodes::SYSTEM_ERROR); + throwFromErrnoWithPath("Cannot stat " + p.string(), p.string(), ErrorCodes::SYSTEM_ERROR); return st.st_dev; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index c83eaae0264..f64bdcc9740 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -413,7 +413,8 @@ void MergeTreeDataPart::remove() const { String path_to_remove = to + "/" + file; if (0 != unlink(path_to_remove.c_str())) - throwFromErrno("Cannot unlink file " + path_to_remove, path_to_remove, ErrorCodes::CANNOT_UNLINK); + throwFromErrnoWithPath("Cannot unlink file " + path_to_remove, path_to_remove, + ErrorCodes::CANNOT_UNLINK); } #if !__clang__ #pragma GCC diagnostic pop @@ -423,11 +424,12 @@ void MergeTreeDataPart::remove() const { String path_to_remove = to + "/" + file; if (0 != unlink(path_to_remove.c_str())) - throwFromErrno("Cannot unlink file " + path_to_remove, path_to_remove, ErrorCodes::CANNOT_UNLINK); + throwFromErrnoWithPath("Cannot unlink file " + path_to_remove, path_to_remove, + ErrorCodes::CANNOT_UNLINK); } if (0 != rmdir(to.c_str())) - throwFromErrno("Cannot rmdir file " + to, to, ErrorCodes::CANNOT_UNLINK); + throwFromErrnoWithPath("Cannot rmdir file " + to, to, ErrorCodes::CANNOT_UNLINK); } catch (...) { diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index 1b75a4b549f..42745e11971 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -214,7 +214,8 @@ StorageStripeLog::StorageStripeLog( { /// create files if they do not exist if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST) - throwFromErrno("Cannot create directory " + full_path, full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); + throwFromErrnoWithPath("Cannot create directory " + full_path, full_path, + ErrorCodes::CANNOT_CREATE_DIRECTORY); } } diff --git a/dbms/src/Storages/StorageTinyLog.cpp b/dbms/src/Storages/StorageTinyLog.cpp index 4b88e83c1ba..096fe5b76e5 100644 --- a/dbms/src/Storages/StorageTinyLog.cpp +++ b/dbms/src/Storages/StorageTinyLog.cpp @@ -343,7 +343,8 @@ StorageTinyLog::StorageTinyLog( { /// create files if they do not exist if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST) - throwFromErrno("Cannot create directory " + full_path, full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); + throwFromErrnoWithPath("Cannot create directory " + full_path, full_path, + ErrorCodes::CANNOT_CREATE_DIRECTORY); } for (const auto & col : getColumns().getAllPhysical()) diff --git a/dbms/src/Storages/tests/remove_symlink_directory.cpp b/dbms/src/Storages/tests/remove_symlink_directory.cpp index b193c181c30..b455357863e 100644 --- a/dbms/src/Storages/tests/remove_symlink_directory.cpp +++ b/dbms/src/Storages/tests/remove_symlink_directory.cpp @@ -22,7 +22,7 @@ try Poco::File("./test_dir/file").createFile(); if (0 != symlink("./test_dir", "./test_link")) - DB::throwFromErrno("Cannot create symlink", "./test_link", DB::ErrorCodes::SYSTEM_ERROR); + DB::throwFromErrnoWithPath("Cannot create symlink", "./test_link", DB::ErrorCodes::SYSTEM_ERROR); Poco::File link("./test_link"); link.renameTo("./test_link2"); From 4982f3e9f9763fd31269b60500b2a1eca52674f5 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 7 Aug 2019 16:52:15 +0300 Subject: [PATCH 32/44] Add links to hits_100m dataset. --- docs/en/getting_started/example_datasets/metrica.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting_started/example_datasets/metrica.md b/docs/en/getting_started/example_datasets/metrica.md index c26332ad563..75741ba0b54 100644 --- a/docs/en/getting_started/example_datasets/metrica.md +++ b/docs/en/getting_started/example_datasets/metrica.md @@ -1,5 +1,5 @@ # Anonymized Yandex.Metrica Data -Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. Each of the tables can be downloaded as a compressed `tsv.xz` file or as prepared partitions. +Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. Each of the tables can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at `https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz` and as prepared partitions at `https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz`. ## Obtaining Tables from Prepared Partitions **Download and import hits:** From 5bb0cde59c4de1f89d2c7b00316c952302f32ed8 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 7 Aug 2019 16:53:46 +0300 Subject: [PATCH 33/44] hotfix for Decimal.compareAt --- dbms/src/Columns/ColumnDecimal.cpp | 2 +- .../0_stateless/00880_decimal_in_key.reference | 2 ++ .../queries/0_stateless/00880_decimal_in_key.sql | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00880_decimal_in_key.reference create mode 100644 dbms/tests/queries/0_stateless/00880_decimal_in_key.sql diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 55c22884144..73b0ee282d6 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -26,7 +26,7 @@ namespace ErrorCodes template int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const { - auto other = static_cast(rhs_); + auto & other = static_cast(rhs_); const T & a = data[n]; const T & b = other.data[m]; diff --git a/dbms/tests/queries/0_stateless/00880_decimal_in_key.reference b/dbms/tests/queries/0_stateless/00880_decimal_in_key.reference new file mode 100644 index 00000000000..fcd78da1283 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00880_decimal_in_key.reference @@ -0,0 +1,2 @@ +1000000 +1000000 diff --git a/dbms/tests/queries/0_stateless/00880_decimal_in_key.sql b/dbms/tests/queries/0_stateless/00880_decimal_in_key.sql new file mode 100644 index 00000000000..44edf725a41 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00880_decimal_in_key.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (str String, dec Decimal64(8)) ENGINE = MergeTree ORDER BY str; +CREATE TABLE t2 (str String, dec Decimal64(8)) ENGINE = MergeTree ORDER BY dec; + +INSERT INTO t1 SELECT toString(number), toDecimal64(number, 8) FROM system.numbers LIMIT 1000000; +SELECT count() FROM t1; + +INSERT INTO t2 SELECT toString(number), toDecimal64(number, 8) FROM system.numbers LIMIT 1000000; +SELECT count() FROM t2; + +DROP TABLE t1; +DROP TABLE t2; From c76c11d8cd155d5adc454e046148bcba01570ded Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 7 Aug 2019 17:12:57 +0300 Subject: [PATCH 34/44] minor perf improvement --- dbms/src/Columns/ColumnDecimal.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 73b0ee282d6..5e475b64dfa 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -30,6 +30,8 @@ int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) c const T & a = data[n]; const T & b = other.data[m]; + if (scale == other.scale) + return a > b ? 1 : (a < b ? -1 : 0); return decimalLess(b, a, other.scale, scale) ? 1 : (decimalLess(a, b, scale, other.scale) ? -1 : 0); } From e6a93698cb49964f7f1c6b759e0e40226bae4d39 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 7 Aug 2019 17:41:03 +0300 Subject: [PATCH 35/44] add perf test --- dbms/tests/performance/order_by_decimals.xml | 30 ++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 dbms/tests/performance/order_by_decimals.xml diff --git a/dbms/tests/performance/order_by_decimals.xml b/dbms/tests/performance/order_by_decimals.xml new file mode 100644 index 00000000000..ad6937cd1d6 --- /dev/null +++ b/dbms/tests/performance/order_by_decimals.xml @@ -0,0 +1,30 @@ + + + sorting + comparison + + + loop + + + + 5 + 10000 + + + 50 + 60000 + + + + SELECT toInt32(number) AS n FROM numbers(1000000) ORDER BY n DESC + SELECT toDecimal32(number, 0) AS n FROM numbers(1000000) ORDER BY n + + SELECT toDecimal32(number, 0) AS n FROM numbers(1000000) ORDER BY n DESC + SELECT toDecimal64(number, 8) AS n FROM numbers(1000000) ORDER BY n DESC + SELECT toDecimal128(number, 10) AS n FROM numbers(1000000) ORDER BY n DESC + + + + + From 7e22f78ce42729eb31a7ea8ba40f5dfe664a72e9 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 7 Aug 2019 18:47:25 +0300 Subject: [PATCH 36/44] fix segfault with enabled 'optimize_skip_unused_shards' and missing sharding key --- dbms/src/Storages/StorageDistributed.cpp | 2 +- ...skip_unused_shards_without_sharding_key.reference | 1 + ...00980_skip_unused_shards_without_sharding_key.sql | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00980_skip_unused_shards_without_sharding_key.reference create mode 100644 dbms/tests/queries/0_stateless/00980_skip_unused_shards_without_sharding_key.sql diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 6155dabd028..2ecae5789dc 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -493,7 +493,7 @@ ClusterPtr StorageDistributed::skipUnusedShards(ClusterPtr cluster, const Select { const auto & select = query_info.query->as(); - if (!select.where()) + if (!select.where() || !sharding_key_expr) return nullptr; const auto & blocks = evaluateExpressionOverConstantCondition(select.where(), sharding_key_expr); diff --git a/dbms/tests/queries/0_stateless/00980_skip_unused_shards_without_sharding_key.reference b/dbms/tests/queries/0_stateless/00980_skip_unused_shards_without_sharding_key.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_skip_unused_shards_without_sharding_key.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00980_skip_unused_shards_without_sharding_key.sql b/dbms/tests/queries/0_stateless/00980_skip_unused_shards_without_sharding_key.sql new file mode 100644 index 00000000000..cccc4a81038 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00980_skip_unused_shards_without_sharding_key.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t_local; +DROP TABLE IF EXISTS t_distr; + +CREATE TABLE t_local (a Int) ENGINE = Memory; +CREATE TABLE t_distr (a Int) ENGINE = Distributed(test_shard_localhost, currentDatabase(), 't_local'); + +INSERT INTO t_local VALUES (1), (2); +SET optimize_skip_unused_shards = 1; +SELECT * FROM t_distr WHERE a = 1; + +DROP table t_local; +DROP table t_distr; From 910e98ef02a5654cc65355913538f8c187891485 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 7 Aug 2019 18:50:22 +0300 Subject: [PATCH 37/44] Changed boost::filesystem to std::filesystem --- libs/libcommon/src/DateLUT.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/libs/libcommon/src/DateLUT.cpp b/libs/libcommon/src/DateLUT.cpp index ce3e7e32a26..183d003ffe8 100644 --- a/libs/libcommon/src/DateLUT.cpp +++ b/libs/libcommon/src/DateLUT.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include @@ -26,7 +26,7 @@ Poco::DigestEngine::Digest calcSHA1(const std::string & path) std::string determineDefaultTimeZone() { - namespace fs = boost::filesystem; + namespace fs = std::filesystem; const char * tzdir_env_var = std::getenv("TZDIR"); fs::path tz_database_path = tzdir_env_var ? tzdir_env_var : "/usr/share/zoneinfo/"; @@ -87,7 +87,10 @@ std::string determineDefaultTimeZone() /// Try the same with full symlinks resolution { - tz_file_path = fs::canonical(tz_file_path, tz_database_path); + if (!tz_file_path.is_absolute()) + tz_file_path = tz_database_path / tz_file_path; + + tz_file_path = fs::canonical(tz_file_path); fs::path relative_path = tz_file_path.lexically_relative(tz_database_path); if (!relative_path.empty() && *relative_path.begin() != ".." && *relative_path.begin() != ".") @@ -109,11 +112,11 @@ std::string determineDefaultTimeZone() { /// Some timezone databases contain copies of toplevel tzdata files in the posix/ directory /// and tzdata files with leap seconds in the right/ directory. Skip them. - candidate_it.no_push(); + candidate_it.disable_recursion_pending(); continue; } - if (candidate_it->status().type() != fs::regular_file || path.filename() == "localtime") + if (!fs::is_regular_file(*candidate_it) || path.filename() == "localtime") continue; if (fs::file_size(path) == tzfile_size && calcSHA1(path.string()) == tzfile_sha1) From da6ca3f8c749133dc76b213fc8bfa6284e193c4e Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 7 Aug 2019 19:02:56 +0300 Subject: [PATCH 38/44] DOCAPI-7413: T64 codec docs (#6347) --- docs/en/operations/table_engines/mergetree.md | 6 +- docs/en/query_language/create.md | 76 ++++++++++--------- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 2c2141a0a87..2a099a8947d 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -81,9 +81,9 @@ For descriptions of request parameters, see the [request description](../../quer - `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with TTL. Default value: 86400 (1 day). -**Example of setting the sections ** +**Example of setting the sections** -``` +```sql ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192 ``` @@ -125,7 +125,7 @@ The `MergeTree` engine is configured in the same way as in the example above for ## Data Storage -A table consists of data *parts* sorted by primary key. +A table consists of data parts sorted by primary key. When data is inserted in a table, separate data parts are created and each of them is lexicographically sorted by primary key. For example, if the primary key is `(CounterID, Date)`, the data in the part is sorted by `CounterID`, and within each `CounterID`, it is ordered by `Date`. diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index bd2228efa94..81d7982eb00 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -109,25 +109,7 @@ Defines storage time for values. Can be specified only for MergeTree-family tabl ## Column Compression Codecs -Besides default data compression, defined in [server settings](../operations/server_settings/settings.md#compression), per-column specification is also available. - -Supported compression algorithms: - -- `NONE` — No compression. -- `LZ4` — Lossless [data compression algorithm](https://github.com/lz4/lz4) used by default. Applies LZ4 fast compression. -- `LZ4HC[(level)]` — LZ4 CH (high compression) algorithm with configurable level. Default level: 9. If you set `level <= 0`, the default level is applied. Possible levels: [1, 12]. Recommended levels are in range: [4, 9]. -- `ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: [1, 22]. Default value: 1. -- `Delta(delta_bytes)` — compression approach, when raw values are replaced with the difference of two neighbour values. Up to `delta_bytes` are used for storing delta value, so `delta_bytes` is a maximum size of raw values. -Possible `delta_bytes` values: 1, 2, 4, 8. Default value for `delta_bytes` is `sizeof(type)`, if it is equals to 1, 2, 4, 8. Otherwise it equals 1. -- `DoubleDelta` — Compresses values down to 1 bit (in the best case), using deltas calculation. Best compression rates are achieved on monotonic sequences with constant stride, for example, time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64 bit types. Uses 1 extra bit for 32 byte deltas: 5 bit prefix instead of 4 bit prefix. For additional information, see the "Compressing time stamps" section of the [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) document. -- `Gorilla` — Compresses values down to 1 bit (in the best case). The codec is efficient when storing series of floating point values that change slowly, because the best compression rate is achieved when neighbouring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64 bit types. For additional information, see the "Compressing values" section of the [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) document. - -High compression levels useful for asymmetric scenarios, like compress once, decompress a lot of times. Greater levels stands for better compression and higher CPU usage. - -!!!warning - You cannot decompress ClickHouse database files with external utilities, for example, `lz4`. Use the special utility [clickhouse-compressor](https://github.com/yandex/ClickHouse/tree/master/dbms/programs/compressor). - -Syntax example: +By default, ClickHouse applies to columns the compression method, defined in [server settings](../operations/server_settings/settings.md#compression). Also, you can define compression method for each individual column in the `CREATE TABLE` query. ``` CREATE TABLE codec_example @@ -136,28 +118,48 @@ CREATE TABLE codec_example ts DateTime CODEC(LZ4HC), float_value Float32 CODEC(NONE), double_value Float64 CODEC(LZ4HC(9)) -) -ENGINE = MergeTree -PARTITION BY tuple() -ORDER BY dt -``` - -Codecs can be combined in a pipeline. Default table codec is not included into pipeline (if it should be applied to a column, you have to specify it explicitly in pipeline). Example below shows an optimization approach for storing timeseries metrics. -Usually, values for particular metric, stored in `path` does not differ significantly from point to point. Using delta-encoding allows to reduce disk space usage significantly. - -``` -CREATE TABLE timeseries_example -( - dt Date, - ts DateTime, - path String, value Float32 CODEC(Delta, ZSTD) ) -ENGINE = MergeTree -PARTITION BY dt -ORDER BY (path, ts) +ENGINE = +... ``` +If a codec is specified, the default codec doesn't apply. Codecs can be combined in a pipeline, for example, `CODEC(Delta, ZSTD)`. To select the best codecs combination for you project, pass benchmarks, similar to described in the Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) article. + +!!!warning + You cannot decompress ClickHouse database files with external utilities, for example, `lz4`. Use the special utility, [clickhouse-compressor](https://github.com/yandex/ClickHouse/tree/master/dbms/programs/compressor). + +Compression is supported for the table engines: + +- [*MergeTree](../operations/table_engines/mergetree.md) family +- [*Log](../operations/table_engines/log_family.md) family +- [Set](../operations/table_engines/set.md) +- [Join](../operations/table_engines/join.md) + +ClickHouse supports common purpose codecs and specialized codecs. + +### Specialized codecs {#create-query-specialized-codecs} + +These codecs are designed to make compression more effective using specifities of the data. Some of this codecs don't compress data by itself, but they prepare data to be compressed better by common purpose codecs. + +Specialized codecs: + +- `Delta(delta_bytes)` — Compression approach, when raw values are replaced with the difference of two neighbor values. Up to `delta_bytes` are used for storing delta value, so `delta_bytes` is a maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. Default value for `delta_bytes` is `sizeof(type)`, if it is equals to 1, 2, 4, 8. Otherwise it equals 1. +- `DoubleDelta` — Compresses values down to 1 bit (in the best case), using deltas calculation. Best compression rates are achieved on monotonic sequences with constant stride, for example, time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64 bit types. Uses 1 extra bit for 32 byte deltas: 5 bit prefix instead of 4 bit prefix. For additional information, see the "Compressing time stamps" section of the [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) document. +- `Gorilla` — Compresses values down to 1 bit (in the best case). The codec is efficient when storing series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64 bit types. For additional information, see the "Compressing values" section of the [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) document. +- `T64` — Compression approach that crops unused high bits of values in integer data types (including `Enum`, `Date` and `DateTime`). At each step of its algorithm, codec takes a block of 64 values, puts them into 64x64 bit matrix, transposes it, crops the unused bits of values and returns the rest as a sequence. Unused bits are the bits, that don't differ between maximum and minimum values in the whole data part for which the compression is used. + +### Common purpose codecs {#create-query-common-purpose-codecs} + +Codecs: + +- `NONE` — No compression. +- `LZ4` — Lossless [data compression algorithm](https://github.com/lz4/lz4) used by default. Applies LZ4 fast compression. +- `LZ4HC[(level)]` — LZ4 CH (high compression) algorithm with configurable level. Default level: 9. If you set `level <= 0`, the default level is applied. Possible levels: [1, 12]. Recommended levels are in range: [4, 9]. +- `ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: [1, 22]. Default level: 1. + +High compression levels useful for asymmetric scenarios, like compress once, decompress a lot of times. Greater levels stands for better compression and higher CPU usage. + ## Temporary Tables ClickHouse supports temporary tables which have the following characteristics: From 1bf6e034e8157966ac039c7932f8d677f2f3779f Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Wed, 7 Aug 2019 19:10:14 +0300 Subject: [PATCH 39/44] Fix infinite loop when reading Kafka messages (#6354) * Do not pause/resume consumer at all * Fix kafka tests * Try to ensure the subscription * Set timeout for kafka tests and return 'while True' * Update cluster.py * When doing a raw select from kafka, ignore client errors. They may rise due to 'Local: Timed out' while subscribing. --- dbms/src/Storages/IStorage.cpp | 7 +- dbms/src/Storages/IStorage.h | 2 +- .../Kafka/ReadBufferFromKafkaConsumer.cpp | 32 +++++--- dbms/src/Storages/StorageValues.cpp | 2 +- .../integration/test_storage_kafka/test.py | 76 +++++++++++-------- 5 files changed, 73 insertions(+), 46 deletions(-) diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 687ca970311..1504df4f68d 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -145,9 +145,12 @@ namespace } } -void IStorage::check(const Names & column_names) const +void IStorage::check(const Names & column_names, bool include_virtuals) const { - const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); + NamesAndTypesList available_columns = getColumns().getAllPhysical(); + if (include_virtuals) + available_columns.splice(available_columns.end(), getColumns().getVirtuals()); + const String list_of_columns = listOfColumns(available_columns); if (column_names.empty()) diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 7d259f289ee..3f38dc08b83 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -116,7 +116,7 @@ public: /// thread-unsafe part. lockStructure must be acquired /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. - void check(const Names & column_names) const; + void check(const Names & column_names, bool include_virtuals = false) const; /// Check that all the requested names are in the table and have the correct types. void check(const NamesAndTypesList & columns) const; diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index db3de302dd8..01fd09db7e3 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -56,32 +56,42 @@ void ReadBufferFromKafkaConsumer::commit() void ReadBufferFromKafkaConsumer::subscribe(const Names & topics) { { - String message = "Subscribed to topics:"; + String message = "Already subscribed to topics:"; for (const auto & topic : consumer->get_subscription()) message += " " + topic; LOG_TRACE(log, message); } { - String message = "Assigned to topics:"; + String message = "Already assigned to topics:"; for (const auto & toppar : consumer->get_assignment()) message += " " + toppar.get_topic(); LOG_TRACE(log, message); } - consumer->resume(); - // While we wait for an assignment after subscribtion, we'll poll zero messages anyway. // If we're doing a manual select then it's better to get something after a wait, then immediate nothing. - if (consumer->get_subscription().empty()) + // But due to the nature of async pause/resume/subscribe we can't guarantee any persistent state: + // see https://github.com/edenhill/librdkafka/issues/2455 + while (consumer->get_subscription().empty()) { - consumer->pause(); // don't accidentally read any messages - consumer->subscribe(topics); - consumer->poll(5s); - consumer->resume(); + stalled = false; - // FIXME: if we failed to receive "subscribe" response while polling and destroy consumer now, then we may hang up. - // see https://github.com/edenhill/librdkafka/issues/2077 + try + { + consumer->subscribe(topics); + if (nextImpl()) + break; + + // FIXME: if we failed to receive "subscribe" response while polling and destroy consumer now, then we may hang up. + // see https://github.com/edenhill/librdkafka/issues/2077 + } + catch (cppkafka::HandleException & e) + { + if (e.get_error() == RD_KAFKA_RESP_ERR__TIMED_OUT) + continue; + throw; + } } stalled = false; diff --git a/dbms/src/Storages/StorageValues.cpp b/dbms/src/Storages/StorageValues.cpp index d289a4d6579..79d1641f6c2 100644 --- a/dbms/src/Storages/StorageValues.cpp +++ b/dbms/src/Storages/StorageValues.cpp @@ -21,7 +21,7 @@ BlockInputStreams StorageValues::read( size_t /*max_block_size*/, unsigned /*num_streams*/) { - check(column_names); + check(column_names, true); return BlockInputStreams(1, std::make_shared(res_block)); } diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 9be725d33b7..f066dc34a7f 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -122,6 +122,7 @@ def kafka_setup_teardown(): # Tests +@pytest.mark.timeout(60) def test_kafka_settings_old_syntax(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -136,14 +137,15 @@ def test_kafka_settings_old_syntax(kafka_cluster): kafka_produce('old', messages) result = '' - for i in range(50): - result += instance.query('SELECT * FROM test.kafka') + while True: + result += instance.query('SELECT * FROM test.kafka', ignore_error=True) if kafka_check_result(result): break - time.sleep(0.5) + kafka_check_result(result, True) -@pytest.mark.skip(reason="fails for some reason") + +@pytest.mark.timeout(60) def test_kafka_settings_new_syntax(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -171,14 +173,15 @@ def test_kafka_settings_new_syntax(kafka_cluster): kafka_produce('new', messages) result = '' - for i in range(50): - result += instance.query('SELECT * FROM test.kafka') + while True: + result += instance.query('SELECT * FROM test.kafka', ignore_error=True) if kafka_check_result(result): break - time.sleep(0.5) + kafka_check_result(result, True) +@pytest.mark.timeout(60) def test_kafka_csv_with_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -196,14 +199,15 @@ def test_kafka_csv_with_delimiter(kafka_cluster): kafka_produce('csv', messages) result = '' - for i in range(50): - result += instance.query('SELECT * FROM test.kafka') + while True: + result += instance.query('SELECT * FROM test.kafka', ignore_error=True) if kafka_check_result(result): break - time.sleep(0.5) + kafka_check_result(result, True) +@pytest.mark.timeout(60) def test_kafka_tsv_with_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -221,14 +225,15 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): kafka_produce('tsv', messages) result = '' - for i in range(50): - result += instance.query('SELECT * FROM test.kafka') + while True: + result += instance.query('SELECT * FROM test.kafka', ignore_error=True) if kafka_check_result(result): break - time.sleep(0.5) + kafka_check_result(result, True) +@pytest.mark.timeout(60) def test_kafka_json_without_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -250,14 +255,15 @@ def test_kafka_json_without_delimiter(kafka_cluster): kafka_produce('json', [messages]) result = '' - for i in range(50): - result += instance.query('SELECT * FROM test.kafka') + while True: + result += instance.query('SELECT * FROM test.kafka', ignore_error=True) if kafka_check_result(result): break - time.sleep(0.5) + kafka_check_result(result, True) +@pytest.mark.timeout(60) def test_kafka_protobuf(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value String) @@ -274,14 +280,15 @@ def test_kafka_protobuf(kafka_cluster): kafka_produce_protobuf_messages('pb', 21, 29) result = '' - for i in range(50): - result += instance.query('SELECT * FROM test.kafka') + while True: + result += instance.query('SELECT * FROM test.kafka', ignore_error=True) if kafka_check_result(result): break - time.sleep(0.5) + kafka_check_result(result, True) +@pytest.mark.timeout(60) def test_kafka_materialized_view(kafka_cluster): instance.query(''' DROP TABLE IF EXISTS test.view; @@ -305,19 +312,20 @@ def test_kafka_materialized_view(kafka_cluster): messages.append(json.dumps({'key': i, 'value': i})) kafka_produce('mv', messages) - for i in range(50): + while True: result = instance.query('SELECT * FROM test.view') if kafka_check_result(result): break - time.sleep(0.5) - kafka_check_result(result, True) instance.query(''' DROP TABLE test.consumer; DROP TABLE test.view; ''') -@pytest.mark.skip(reason="Hungs") + kafka_check_result(result, True) + + +@pytest.mark.timeout(300) def test_kafka_flush_on_big_message(kafka_cluster): # Create batchs of messages of size ~100Kb kafka_messages = 1000 @@ -354,15 +362,20 @@ def test_kafka_flush_on_big_message(kafka_cluster): except kafka.errors.GroupCoordinatorNotAvailableError: continue - for i in range(50): + while True: result = instance.query('SELECT count() FROM test.view') if int(result) == kafka_messages*batch_messages: break - time.sleep(0.5) + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') assert int(result) == kafka_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(60) def test_kafka_virtual_columns(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -384,14 +397,15 @@ def test_kafka_virtual_columns(kafka_cluster): kafka_produce('virt1', [messages]) result = '' - for i in range(50): - result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka') + while True: + result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka', ignore_error=True) if kafka_check_result(result, False, 'test_kafka_virtual1.reference'): break - time.sleep(0.5) + kafka_check_result(result, True, 'test_kafka_virtual1.reference') +@pytest.mark.timeout(60) def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): instance.query(''' DROP TABLE IF EXISTS test.view; @@ -415,18 +429,18 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): messages.append(json.dumps({'key': i, 'value': i})) kafka_produce('virt2', messages) - for i in range(50): + while True: result = instance.query('SELECT kafka_key, key, topic, value, offset FROM test.view') if kafka_check_result(result, False, 'test_kafka_virtual2.reference'): break - time.sleep(0.5) - kafka_check_result(result, True, 'test_kafka_virtual2.reference') instance.query(''' DROP TABLE test.consumer; DROP TABLE test.view; ''') + kafka_check_result(result, True, 'test_kafka_virtual2.reference') + if __name__ == '__main__': cluster.start() From c5b25b23e3092b815c8eaa58cb832bfc1c7423db Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 7 Aug 2019 20:06:56 +0300 Subject: [PATCH 40/44] DOCAPI-8016: Adam weights mention in docs * DOCAPI-8016: Added mention about Adam method of updating weights in regressions. * DOCAPI-8016: RU translation. --- docs/en/query_language/agg_functions/reference.md | 4 ++-- docs/ru/query_language/agg_functions/reference.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index f9cb88c0113..350803f5aef 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -1009,7 +1009,7 @@ SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) ## stochasticLinearRegression {#agg_functions-stochasticlinearregression} -This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). +This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). ### Parameters {#agg_functions-stochasticlinearregression-parameters} @@ -1022,7 +1022,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD') 1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`. 2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`. 3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`. -4. `method for updating weights`, there are 3 of them: `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods. Default is `'SGD'`. +4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods. ### Usage {#agg_functions-stochasticlinearregression-usage} diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md index fca564b7a14..12308169f9a 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/query_language/agg_functions/reference.md @@ -878,7 +878,7 @@ SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) ## stochasticLinearRegression {#agg_functions-stochasticlinearregression} -Функция реализует стохастическую линейную регрессию. Поддерживает пользовательские параметры для скорости обучения, коэффициента регуляризации L2, размера mini-batch и имеет несколько методов обновления весов ([simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). +Функция реализует стохастическую линейную регрессию. Поддерживает пользовательские параметры для скорости обучения, коэффициента регуляризации L2, размера mini-batch и имеет несколько методов обновления весов ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (по умолчанию), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). ### Параметры {#agg_functions-stochasticlinearregression-parameters} @@ -891,7 +891,8 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD') 1. Скорость обучения — коэффициент длины шага, при выполнении градиентного спуска. Слишком большая скорость обучения может привести к бесконечным весам модели. По умолчанию `0.00001`. 2. Коэффициент регуляризации l2. Помогает предотвратить подгонку. По умолчанию `0.1`. 3. Размер mini-batch задаёт количество элементов, чьи градиенты будут вычислены и просуммированы при выполнении одного шага градиентного спуска. Чистый стохастический спуск использует один элемент, однако использование mini-batch (около 10 элементов) делает градиентные шаги более стабильными. По умолчанию `15`. -4. Метод обновления весов, можно выбрать один из следующих: `SGD`, `Momentum`, `Nesterov`. `Momentum` и `Nesterov` более требовательные к вычислительным ресурсам и памяти, однако они имеют высокую скорость схождения и остальные методы стохастического градиента. По умолчанию `SGD`. +4. Метод обновления весов, можно выбрать один из следующих: `Adam` (по умолчанию), `SGD`, `Momentum`, `Nesterov`. `Momentum` и `Nesterov` более требовательные к вычислительным ресурсам и памяти, однако они имеют высокую скорость схождения и устойчивости методов стохастического градиента. + ### Использование {#agg_functions-stochasticlinearregression-usage} @@ -1005,4 +1006,3 @@ stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') - [Отличие линейной от логистической регрессии](https://moredez.ru/q/51225972/) [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/agg_functions/reference/) - From 7a0baefac0f63d987c08fa31223e770759af4e78 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 7 Aug 2019 21:12:36 +0300 Subject: [PATCH 41/44] DOCAPI-7783: Update of the SET query documentation (#6165) * DOCAPI-7783: Update of the SET query documentation. --- docs/en/query_language/misc.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/en/query_language/misc.md b/docs/en/query_language/misc.md index 31bfea5dc4d..514f5d9f823 100644 --- a/docs/en/query_language/misc.md +++ b/docs/en/query_language/misc.md @@ -195,18 +195,21 @@ RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). -## SET +## SET {#query-set} -``` sql +```sql SET param = value ``` -Allows you to set `param` to `value`. You can also make all the settings from the specified settings profile in a single query. To do this, specify 'profile' as the setting name. For more information, see the section "Settings". -The setting is made for the session, or for the server (globally) if `GLOBAL` is specified. -When making a global setting, the setting is not applied to sessions already running, including the current session. It will only be used for new sessions. +Assigns `value` to the `param` configurations settings for the current session. You cannot change [server settings](../operations/server_settings/index.md) this way. -When the server is restarted, global settings made using `SET` are lost. -To make settings that persist after a server restart, you can only use the server's config file. +You can also set all the values from the specified settings profile in a single query. + +```sql +SET profile = 'profile-name-from-the-settings-file' +``` + +For more information, see [Settings](../operations/settings/settings.md). ## SHOW CREATE TABLE From 688a80dd809ed7ac2484fb99fe5eab8c32d9ee0b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 7 Aug 2019 21:52:53 +0300 Subject: [PATCH 42/44] Added a test just in case #6381 --- .../tests/gtest_transform_query_for_external_database.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp index c17676bc655..e61ab6279a2 100644 --- a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -69,3 +69,10 @@ TEST(TransformQueryForExternalDatabase, Like) "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT LIKE 'w%rld'", state().context, state().columns); } + +TEST(TransformQueryForExternalDatabase, Substring) +{ + check("SELECT column FROM test.table WHERE left(column, 10) = RIGHT(column, 10) AND SUBSTRING(column FROM 1 FOR 2) = 'Hello'", + "SELECT \"column\" FROM \"test\".\"table\"", + state().context, state().columns); +} From d256fb9d578fda98473b4fa0272463baef740ea3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 7 Aug 2019 22:14:58 +0300 Subject: [PATCH 43/44] Removed extra verbose logging from MySQL handler --- dbms/programs/server/MySQLHandler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index adf35501ee4..6a943183104 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -77,7 +77,7 @@ void MySQLHandler::run() if (!connection_context.mysql.max_packet_size) connection_context.mysql.max_packet_size = MAX_PACKET_LENGTH; - LOG_DEBUG(log, "Capabilities: " << handshake_response.capability_flags +/* LOG_TRACE(log, "Capabilities: " << handshake_response.capability_flags << "\nmax_packet_size: " << handshake_response.max_packet_size << "\ncharacter_set: " @@ -91,7 +91,7 @@ void MySQLHandler::run() << "\ndatabase: " << handshake_response.database << "\nauth_plugin_name: " - << handshake_response.auth_plugin_name); + << handshake_response.auth_plugin_name);*/ client_capability_flags = handshake_response.capability_flags; if (!(client_capability_flags & CLIENT_PROTOCOL_41)) From a6a9eaef7c59355ac749b290925bec21a3b8f2af Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2019 00:53:50 +0300 Subject: [PATCH 44/44] Fix for Mac OS build --- dbms/src/Common/HashTable/Hash.h | 34 +++++++++++--------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/dbms/src/Common/HashTable/Hash.h b/dbms/src/Common/HashTable/Hash.h index 6e786e1ddbf..0f740163179 100644 --- a/dbms/src/Common/HashTable/Hash.h +++ b/dbms/src/Common/HashTable/Hash.h @@ -3,6 +3,8 @@ #include #include +#include + /** Hash functions that are better than the trivial function std::hash. * @@ -57,8 +59,6 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x) } -template struct DefaultHash; - template inline size_t DefaultHash64(T key) { @@ -72,28 +72,18 @@ inline size_t DefaultHash64(T key) return intHash64(u.out); } -#define DEFINE_HASH(T) \ -template <> struct DefaultHash\ -{\ - size_t operator() (T key) const\ - {\ - return DefaultHash64(key);\ - }\ +template +struct DefaultHash; + +template +struct DefaultHash>> +{ + size_t operator() (T key) const + { + return DefaultHash64(key); + } }; -DEFINE_HASH(DB::UInt8) -DEFINE_HASH(DB::UInt16) -DEFINE_HASH(DB::UInt32) -DEFINE_HASH(DB::UInt64) -DEFINE_HASH(DB::Int8) -DEFINE_HASH(DB::Int16) -DEFINE_HASH(DB::Int32) -DEFINE_HASH(DB::Int64) -DEFINE_HASH(DB::Float32) -DEFINE_HASH(DB::Float64) - -#undef DEFINE_HASH - template struct HashCRC32;