From ac1a78f4d9a4382ac52a01ed0efc65b8f08f02e7 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Mon, 24 Jan 2022 11:00:56 +0800 Subject: [PATCH 01/39] fix substr local metadata differ zookeeper metadata --- src/Common/IFactoryWithAliases.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index f7da302a942..7f5b53a80fa 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -120,8 +120,12 @@ public: const String & getCanonicalNameIfAny(const String & name) const { auto it = case_insensitive_name_mapping.find(Poco::toLower(name)); - if (it != case_insensitive_name_mapping.end()) - return it->second; + if (it != case_insensitive_name_mapping.end()) { + if (it->first != name) + { + return it->second; + } + } return name; } From 58dd1a2d5c029d2cea4eb0d5544ca0f1063789b3 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Sat, 8 Jan 2022 20:21:39 +0800 Subject: [PATCH 02/39] add function addressToLineWithInlines --- .../sampling-query-profiler.md | 2 +- .../operations/system-tables/stack_trace.md | 2 +- docs/en/operations/system-tables/trace_log.md | 2 +- .../sql-reference/functions/introspection.md | 105 ++++++++++ docs/en/sql-reference/statements/grant.md | 2 + src/Access/Common/AccessType.h | 1 + src/Functions/addressToLineWithInlines.cpp | 192 ++++++++++++++++++ .../registerFunctionsIntrospection.cpp | 2 + .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 4 +- .../02161_addressToLineWithInlines.reference | 4 + .../02161_addressToLineWithInlines.sh | 109 ++++++++++ 12 files changed, 421 insertions(+), 5 deletions(-) create mode 100644 src/Functions/addressToLineWithInlines.cpp create mode 100644 tests/queries/0_stateless/02161_addressToLineWithInlines.reference create mode 100755 tests/queries/0_stateless/02161_addressToLineWithInlines.sh diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 9244592d515..72cfa59b8b2 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -27,7 +27,7 @@ To analyze the `trace_log` system table: For security reasons, introspection functions are disabled by default. -- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. +- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md index eb1824a6f66..e2135e4beb6 100644 --- a/docs/en/operations/system-tables/stack_trace.md +++ b/docs/en/operations/system-tables/stack_trace.md @@ -2,7 +2,7 @@ Contains stack traces of all server threads. Allows developers to introspect the server state. -To analyze stack frames, use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md). +To analyze stack frames, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md). Columns: diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 4902b09004d..ab08ef7415c 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -4,7 +4,7 @@ Contains stack traces collected by the sampling query profiler. ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. -To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions. +To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` introspection functions. Columns: diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 21b570c65d4..595d3c4a16a 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -113,6 +113,111 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so /build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97 ``` +## addressToLineWithInlines {#addresstolinewithinlines} + +Similar to `addressToLine`, but it will return an Array with all inline functions. + +If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. + +**Syntax** + +``` sql +addressToLineWithInlines(address_of_binary_instruction) +``` + +**Arguments** + +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. + +**Returned value** + +- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. + +- Array with single element which is name of a binary, if the function couldn’t find the debug information. + +- Empty array, if the address is not valid. + +Type: [Array(String)](../../sql-reference/data-types/array.md). + +**Example** + +Enabling introspection functions: + +``` sql +SET allow_introspection_functions=1; +``` + +Applying the function to address. + +```sql +SELECT addressToLineWithInlines(531055181::UInt64); +``` + +``` text +┌─addressToLineWithInlines(CAST('531055181', 'UInt64'))────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Applying the function to the whole stack trace: + +``` sql +SELECT + ta, addressToLineWithInlines(arrayJoin(trace) as ta) +FROM system.trace_log +WHERE + query_id = '5e173544-2020-45de-b645-5deebe2aae54'; +``` + +The [arrayJoin](../../sql-reference/functions/array-functions.md#array-functions-join) functions will split array to rows. + +``` text +┌────────ta─┬─addressToLineWithInlines(arrayJoin(trace))───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 365497529 │ ['./build_normal_debug/./contrib/libcxx/include/string_view:252'] │ +│ 365593602 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:191'] │ +│ 365593866 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365592528 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365591003 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:477'] │ +│ 365590479 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:442'] │ +│ 365590600 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:457'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365597289 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:807'] │ +│ 365599840 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:1118'] │ +│ 531058145 │ ['./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:152'] │ +│ 531055181 │ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │ +│ 422333613 │ ['./build_normal_debug/./src/Functions/IFunctionAdaptors.h:21'] │ +│ 586866022 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:216'] │ +│ 586869053 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:264'] │ +│ 586873237 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:334'] │ +│ 597901620 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:601'] │ +│ 597898534 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:718'] │ +│ 630442912 │ ['./build_normal_debug/./src/Processors/Transforms/ExpressionTransform.cpp:23'] │ +│ 546354050 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.h:38'] │ +│ 626026993 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.cpp:89'] │ +│ 626294022 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:45'] │ +│ 626293730 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:63'] │ +│ 626169525 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:213'] │ +│ 626170308 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:178'] │ +│ 626166348 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:329'] │ +│ 626163461 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:84'] │ +│ 626323536 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:85'] │ +│ 626323277 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:112'] │ +│ 626323133 │ ['./build_normal_debug/./contrib/libcxx/include/type_traits:3682'] │ +│ 626323041 │ ['./build_normal_debug/./contrib/libcxx/include/tuple:1415'] │ +└───────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + +``` + + ## addressToSymbol {#addresstosymbol} Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files. diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 2b1262f7d3c..1b2b63ba0e7 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -172,6 +172,7 @@ Hierarchy of privileges: - `SYSTEM FLUSH LOGS` - [INTROSPECTION](#grant-introspection) - `addressToLine` + - `addressToLineWithInlines` - `addressToSymbol` - `demangle` - [SOURCES](#grant-sources) @@ -430,6 +431,7 @@ Allows using [introspection](../../operations/optimizing-performance/sampling-qu - `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` - `addressToLine`. Level: `GLOBAL` + - `addressToLineWithInlines`. Level: `GLOBAL` - `addressToSymbol`. Level: `GLOBAL` - `demangle`. Level: `GLOBAL` diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 4472e975878..e7c70eba575 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -166,6 +166,7 @@ enum class AccessType M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\ \ M(addressToLine, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLine() */\ + M(addressToLineWithInlines, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLineWithInlines() */\ M(addressToSymbol, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToSymbol() */\ M(demangle, "", GLOBAL, INTROSPECTION) /* allows to execute function demangle() */\ M(INTROSPECTION, "INTROSPECTION FUNCTIONS", GROUP, ALL) /* allows to execute functions addressToLine(), addressToSymbol(), demangle()*/\ diff --git a/src/Functions/addressToLineWithInlines.cpp b/src/Functions/addressToLineWithInlines.cpp new file mode 100644 index 00000000000..4a3027e399f --- /dev/null +++ b/src/Functions/addressToLineWithInlines.cpp @@ -0,0 +1,192 @@ +#if defined(__ELF__) && !defined(__FreeBSD__) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + + +class FunctionAddressToLineWithInlines : public IFunction +{ +public: + static constexpr auto name = "addressToLineWithInlines"; + static FunctionPtr create(ContextPtr context) + { + context->checkAccess(AccessType::addressToLineWithInlines); + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 1; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1) + throw Exception("Function " + getName() + " needs exactly one argument; passed " + + toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto & type = arguments[0].type; + + if (!WhichDataType(type.get()).isUInt64()) + throw Exception("The only argument for function " + getName() + " must be UInt64. Found " + + type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(std::make_shared()); + } + + bool useDefaultImplementationForConstants() const override + { + return true; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const ColumnPtr & column = arguments[0].column; + const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); + + if (!column_concrete) + throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + + const typename ColumnVector::Container & data = column_concrete->getData(); + auto result_column = ColumnArray::create(ColumnString::create()); + + ColumnString & result_strings = typeid_cast(result_column->getData()); + ColumnArray::Offsets & result_offsets = result_column->getOffsets(); + + ColumnArray::Offset current_offset = 0; + + for (size_t i = 0; i < input_rows_count; ++i) + { + StringRefs res = implCached(data[i]); + for (auto & r : res) + result_strings.insertData(r.data, r.size); + current_offset += res.size(); + result_offsets.push_back(current_offset); + } + + return result_column; + } + +private: + struct Cache + { + std::mutex mutex; + Arena arena; + using Map = HashMap; + Map map; + std::unordered_map dwarfs; + }; + + mutable Cache cache; + + inline ALWAYS_INLINE void appendLocation2Result(StringRefs & result, Dwarf::LocationInfo & location, Dwarf::SymbolizedFrame * frame) const + { + const char * arena_begin = nullptr; + WriteBufferFromArena out(cache.arena, arena_begin); + + writeString(location.file.toString(), out); + writeChar(':', out); + writeIntText(location.line, out); + + if (frame) + { + writeChar(':', out); + int status = 0; + writeString(demangle(frame->name, status), out); + } + + result.emplace_back(out.complete()); + } + + StringRefs impl(uintptr_t addr) const + { + auto symbol_index_ptr = SymbolIndex::instance(); + const SymbolIndex & symbol_index = *symbol_index_ptr; + + if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) + { + auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first; + if (!std::filesystem::exists(object->name)) + return {}; + + Dwarf::LocationInfo location; + std::vector inline_frames; + if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FULL_WITH_INLINE, inline_frames)) + { + StringRefs ret; + appendLocation2Result(ret, location, nullptr); + for (auto & inline_frame : inline_frames) + appendLocation2Result(ret, inline_frame.location, &inline_frame); + return ret; + } + else + { + return {object->name}; + } + } + else + return {}; + } + + /// ALWAYS_INLINE is also a self-containing testcase used in 0_stateless/02161_addressToLineWithInlines. + /// If changed here, change 02161 together. + inline ALWAYS_INLINE StringRefs implCached(uintptr_t addr) const + { + Cache::Map::LookupResult it; + bool inserted; + std::lock_guard lock(cache.mutex); + cache.map.emplace(addr, it, inserted); + if (inserted) + it->getMapped() = impl(addr); + return it->getMapped(); + } +}; + +} + +void registerFunctionAddressToLineWithInlines(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsIntrospection.cpp b/src/Functions/registerFunctionsIntrospection.cpp index fe76c96d62d..76a92847d8e 100644 --- a/src/Functions/registerFunctionsIntrospection.cpp +++ b/src/Functions/registerFunctionsIntrospection.cpp @@ -6,6 +6,7 @@ class FunctionFactory; #if defined(OS_LINUX) void registerFunctionAddressToSymbol(FunctionFactory & factory); void registerFunctionAddressToLine(FunctionFactory & factory); +void registerFunctionAddressToLineWithInlines(FunctionFactory & factory); #endif void registerFunctionDemangle(FunctionFactory & factory); @@ -17,6 +18,7 @@ void registerFunctionsIntrospection(FunctionFactory & factory) #if defined(OS_LINUX) registerFunctionAddressToSymbol(factory); registerFunctionAddressToLine(factory); + registerFunctionAddressToLineWithInlines(factory); #endif registerFunctionDemangle(factory); registerFunctionTrap(factory); diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index cc237a40a3f..ca7c6312130 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -118,6 +118,7 @@ SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER',' SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL addressToLine [] GLOBAL INTROSPECTION +addressToLineWithInlines [] GLOBAL INTROSPECTION addressToSymbol [] GLOBAL INTROSPECTION demangle [] GLOBAL INTROSPECTION INTROSPECTION ['INTROSPECTION FUNCTIONS'] \N ALL diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 234804f1078..1b41e613f5c 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -20,7 +20,7 @@ CREATE TABLE system.errors\n(\n `name` String,\n `code` Int32,\n `value CREATE TABLE system.events\n(\n `event` String,\n `value` UInt64,\n `description` String\n)\nENGINE = SystemEvents()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.formats\n(\n `name` String,\n `is_input` UInt8,\n `is_output` UInt8\n)\nENGINE = SystemFormats()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.functions\n(\n `name` String,\n `is_aggregate` UInt8,\n `case_insensitive` UInt8,\n `alias_to` String,\n `create_query` String,\n `origin` Enum8(\'System\' = 0, \'SQLUserDefined\' = 1, \'ExecutableUserDefined\' = 2)\n)\nENGINE = SystemFunctions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum8(\'MYSQL\' = -128, \'POSTGRES\' = -127, \'SQLITE\' = -126, \'ODBC\' = -125, \'JDBC\' = -124, \'HDFS\' = -123, \'S3\' = -122, \'SOURCES\' = -121, \'ALL\' = -120, \'NONE\' = -119, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `rule_type` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.licenses\n(\n `library_name` String,\n `license_type` String,\n `license_path` String,\n `license_text` String\n)\nENGINE = SystemLicenses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.macros\n(\n `macro` String,\n `substitution` String\n)\nENGINE = SystemMacros()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' @@ -35,7 +35,7 @@ CREATE TABLE system.one\n(\n `dummy` UInt8\n)\nENGINE = SystemOne()\nCOMMENT CREATE TABLE system.part_moves_between_shards\n(\n `database` String,\n `table` String,\n `task_name` String,\n `task_uuid` UUID,\n `create_time` DateTime,\n `part_name` String,\n `part_uuid` UUID,\n `to_shard` String,\n `dst_part_name` String,\n `update_time` DateTime,\n `state` String,\n `rollback` UInt8,\n `num_tries` UInt32,\n `last_exception` String\n)\nENGINE = SystemShardMoves()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `secondary_indices_compressed_bytes` UInt64,\n `secondary_indices_uncompressed_bytes` UInt64,\n `secondary_indices_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `projections` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts_columns\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `serialization_kind` String,\n `subcolumns.names` Array(String),\n `subcolumns.types` Array(String),\n `subcolumns.serializations` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.privileges\n(\n `privilege` Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.privileges\n(\n `privilege` Enum8(\'MYSQL\' = -128, \'POSTGRES\' = -127, \'SQLITE\' = -126, \'ODBC\' = -125, \'JDBC\' = -124, \'HDFS\' = -123, \'S3\' = -122, \'SOURCES\' = -121, \'ALL\' = -120, \'NONE\' = -119, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum8(\'MYSQL\' = -128, \'POSTGRES\' = -127, \'SQLITE\' = -126, \'ODBC\' = -125, \'JDBC\' = -124, \'HDFS\' = -123, \'S3\' = -122, \'SOURCES\' = -121, \'ALL\' = -120, \'NONE\' = -119, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts_columns\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.reference b/tests/queries/0_stateless/02161_addressToLineWithInlines.reference new file mode 100644 index 00000000000..48108d5596c --- /dev/null +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.reference @@ -0,0 +1,4 @@ +CHECK: privilege +Code: 446. +CHECK: basic call +Success diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sh b/tests/queries/0_stateless/02161_addressToLineWithInlines.sh new file mode 100755 index 00000000000..0faad6c8e4f --- /dev/null +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sh @@ -0,0 +1,109 @@ +#! /bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +# tags are copied from 00974_query_profiler.sql + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +export CLICKHOUSE_DATABASE=system +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +MAX_FAILED_COUNT=10 +MAX_RETRY_COUNT=10 +log_comment="02161_testcase_$(date +'%s')" + +check_exist_sql="SELECT count(), query_id FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id IN ( + SELECT query_id FROM query_log WHERE log_comment = '${log_comment}' ORDER BY event_time DESC LIMIT 1 +) GROUP BY query_id" + +declare exist_string_result +declare -A exist_result=([count]=0 [query_id]="") + +function update_log_comment() { + log_comment="02161_testcase_$(date +'%s')" +} + + +function flush_log() { + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d 'SYSTEM FLUSH LOGS' +} + +function get_trace_count() { + flush_log + ${CLICKHOUSE_CLIENT} -q 'SELECT count() from system.trace_log'; +} + +function make_trace() { + ${CLICKHOUSE_CLIENT} --query_profiler_cpu_time_period_ns=1000000 --allow_introspection_functions 1 -q "SELECT addressToLineWithInlines(arrayJoin(trace)) FROM system.trace_log SETTINGS log_comment='${log_comment}'" +} + +function check_exist() { + exist_string_result=$(${CLICKHOUSE_CLIENT} --log_queries=0 -q "${check_exist_sql}") + exist_result[count]="$(echo "$exist_string_result" | cut -f 1)" + exist_result[query_id]="$(echo "$exist_string_result" | cut -f 2)" +} + +function get4fail() { + ${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "SELECT addressToLineWithInlines(arrayJoin(trace)) FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id='$1'" + ${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "SELECT addressToLine(arrayJoin(trace)) FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id='$1'" +} + +function final_check_inlines() { + final_check_sql="WITH + address_list AS + ( + SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id='$1' + ) +SELECT max(length(addressToLineWithInlines(address))) > 1 FROM address_list;" + result="$(${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "$final_check_sql")" + [[ "$result" == "1" ]] +} + +function final_check() { + final_check_sql="WITH + address_list AS + ( + SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id='$1' + ) +SELECT max(length(addressToLineWithInlines(address))) >= 1 FROM address_list;" + result="$(${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "$final_check_sql")" + [[ "$result" == "1" ]] +} + +echo "CHECK: privilege" +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d 'SELECT addressToLineWithInlines(1);' | grep -oF 'Code: 446.' || echo 'FAIL' + +echo "CHECK: basic call" + +# won't check inline because there is no debug symbol in some test env. +# e.g: https://s3.amazonaws.com/clickhouse-test-reports/33467/2081b43c9ee59615b2fd31c77390744b10eef61e/stateless_tests__release__wide_parts_enabled__actions_.html + +flush_log +result="" +for ((i=0;i /dev/null + flush_log + sleep 1 + check_exist + done + if final_check "${exist_result[query_id]}";then + result="Success" + break + fi + update_log_comment +done + +if final_check "${exist_result[query_id]}"; then + result="Success" +else + echo "query_id: ${exist_result[query_id]}, count: ${exist_result[count]}" + get4fail "${exist_result[query_id]}" +fi +echo "$result" From 4d7073ceeda8c8dc9eb10269ccbfe24c809e8442 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 00:20:19 +0800 Subject: [PATCH 03/39] 1. generic addressToLine and addressToLineWithInlines, 2. improve addressToLineWithInlines document --- .../sql-reference/functions/introspection.md | 2 +- src/Functions/addressToLine.cpp | 120 ++-------------- src/Functions/addressToLine.h | 134 ++++++++++++++++++ src/Functions/addressToLineWithInlines.cpp | 118 ++------------- 4 files changed, 159 insertions(+), 215 deletions(-) create mode 100644 src/Functions/addressToLine.h diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 595d3c4a16a..1be68c6bdd4 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -115,7 +115,7 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so ## addressToLineWithInlines {#addresstolinewithinlines} -Similar to `addressToLine`, but it will return an Array with all inline functions. +Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price. If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp index c3e48913e97..47390f53147 100644 --- a/src/Functions/addressToLine.cpp +++ b/src/Functions/addressToLine.cpp @@ -1,38 +1,23 @@ #if defined(__ELF__) && !defined(__FreeBSD__) #include -#include -#include -#include #include -#include #include -#include #include #include #include #include -#include -#include -#include -#include +#include namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - namespace { -class FunctionAddressToLine : public IFunction +class FunctionAddressToLine: public FunctionAddressToLineBase { public: static constexpr auto name = "addressToLine"; @@ -41,113 +26,32 @@ public: context->checkAccess(AccessType::addressToLine); return std::make_shared(); } - - String getName() const override +protected: + DataTypePtr getDataType() const override { - return name; - } - - size_t getNumberOfArguments() const override - { - return 1; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (arguments.size() != 1) - throw Exception("Function " + getName() + " needs exactly one argument; passed " - + toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto & type = arguments[0].type; - - if (!WhichDataType(type.get()).isUInt64()) - throw Exception("The only argument for function " + getName() + " must be UInt64. Found " - + type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(); } - - bool useDefaultImplementationForConstants() const override + ColumnPtr getResultColumn(const typename ColumnVector::Container & data, size_t input_rows_count) const override { - return true; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const ColumnPtr & column = arguments[0].column; - const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); - - if (!column_concrete) - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - - const typename ColumnVector::Container & data = column_concrete->getData(); auto result_column = ColumnString::create(); - for (size_t i = 0; i < input_rows_count; ++i) { StringRef res_str = implCached(data[i]); result_column->insertData(res_str.data, res_str.size); } - return result_column; } -private: - struct Cache + void setResult(StringRef & result, const Dwarf::LocationInfo & location, const std::vector &) const override { - std::mutex mutex; - Arena arena; - using Map = HashMap; - Map map; - std::unordered_map dwarfs; - }; + const char * arena_begin = nullptr; + WriteBufferFromArena out(cache.arena, arena_begin); - mutable Cache cache; + writeString(location.file.toString(), out); + writeChar(':', out); + writeIntText(location.line, out); - StringRef impl(uintptr_t addr) const - { - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; - - if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) - { - auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first; - if (!std::filesystem::exists(object->name)) - return {}; - - Dwarf::LocationInfo location; - std::vector frames; // NOTE: not used in FAST mode. - if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FAST, frames)) - { - const char * arena_begin = nullptr; - WriteBufferFromArena out(cache.arena, arena_begin); - - writeString(location.file.toString(), out); - writeChar(':', out); - writeIntText(location.line, out); - - return out.complete(); - } - else - { - return object->name; - } - } - else - return {}; - } - - StringRef implCached(uintptr_t addr) const - { - Cache::Map::LookupResult it; - bool inserted; - std::lock_guard lock(cache.mutex); - cache.map.emplace(addr, it, inserted); - if (inserted) - it->getMapped() = impl(addr); - return it->getMapped(); + result = out.complete(); } }; diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h new file mode 100644 index 00000000000..c2130da56a5 --- /dev/null +++ b/src/Functions/addressToLine.h @@ -0,0 +1,134 @@ +#if defined(__ELF__) && !defined(__FreeBSD__) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +class FunctionAddressToLineBase : public IFunction +{ +public: + static constexpr auto name = "addressToLineBase"; + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1) + throw Exception( + "Function " + getName() + " needs exactly one argument; passed " + toString(arguments.size()) + ".", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto & type = arguments[0].type; + + if (!WhichDataType(type.get()).isUInt64()) + throw Exception( + "The only argument for function " + getName() + " must be UInt64. Found " + type->getName() + " instead.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return getDataType(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const ColumnPtr & column = arguments[0].column; + const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); + + if (!column_concrete) + throw Exception( + "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + + const typename ColumnVector::Container & data = column_concrete->getData(); + return getResultColumn(data, input_rows_count); + } + +protected: + virtual DataTypePtr getDataType() const = 0; + virtual ColumnPtr getResultColumn(const typename ColumnVector::Container & data, size_t input_rows_count) const = 0; + virtual void + setResult(ResultT & result, const Dwarf::LocationInfo & location, const std::vector & frames) const = 0; + + struct Cache + { + std::mutex mutex; + Arena arena; + using Map = HashMap; + Map map; + std::unordered_map dwarfs; + }; + + mutable Cache cache; + + ResultT impl(uintptr_t addr) const + { + auto symbol_index_ptr = SymbolIndex::instance(); + const SymbolIndex & symbol_index = *symbol_index_ptr; + + if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) + { + auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first; + if (!std::filesystem::exists(object->name)) + return {}; + + Dwarf::LocationInfo location; + std::vector frames; // NOTE: not used in FAST mode. + ResultT result; + if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, locationInfoMode, frames)) + { + setResult(result, location, frames); + return result; + } + else + return {object->name}; + } + else + return {}; + } + + ResultT implCached(uintptr_t addr) const + { + typename Cache::Map::LookupResult it; + bool inserted; + std::lock_guard lock(cache.mutex); + cache.map.emplace(addr, it, inserted); + if (inserted) + it->getMapped() = impl(addr); + return it->getMapped(); + } +}; + +} + +#endif diff --git a/src/Functions/addressToLineWithInlines.cpp b/src/Functions/addressToLineWithInlines.cpp index 4a3027e399f..e17fe2a33cc 100644 --- a/src/Functions/addressToLineWithInlines.cpp +++ b/src/Functions/addressToLineWithInlines.cpp @@ -1,42 +1,26 @@ #if defined(__ELF__) && !defined(__FreeBSD__) #include -#include -#include -#include #include #include -#include #include #include -#include #include #include #include #include -#include -#include -#include -#include +#include #include namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - namespace { - -class FunctionAddressToLineWithInlines : public IFunction +class FunctionAddressToLineWithInlines: public FunctionAddressToLineBase { public: static constexpr auto name = "addressToLineWithInlines"; @@ -46,48 +30,15 @@ public: return std::make_shared(); } - String getName() const override +protected: + DataTypePtr getDataType() const override { - return name; - } - - size_t getNumberOfArguments() const override - { - return 1; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (arguments.size() != 1) - throw Exception("Function " + getName() + " needs exactly one argument; passed " - + toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto & type = arguments[0].type; - - if (!WhichDataType(type.get()).isUInt64()) - throw Exception("The only argument for function " + getName() + " must be UInt64. Found " - + type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared(std::make_shared()); } - bool useDefaultImplementationForConstants() const override + ColumnPtr getResultColumn(const typename ColumnVector::Container & data, size_t input_rows_count) const override { - return true; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const ColumnPtr & column = arguments[0].column; - const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); - - if (!column_concrete) - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - - const typename ColumnVector::Container & data = column_concrete->getData(); auto result_column = ColumnArray::create(ColumnString::create()); - ColumnString & result_strings = typeid_cast(result_column->getData()); ColumnArray::Offsets & result_offsets = result_column->getOffsets(); @@ -105,19 +56,16 @@ public: return result_column; } -private: - struct Cache + void setResult(StringRefs & result, const Dwarf::LocationInfo & location, const std::vector & inline_frames) const override { - std::mutex mutex; - Arena arena; - using Map = HashMap; - Map map; - std::unordered_map dwarfs; - }; - mutable Cache cache; + appendLocationToResult(result, location, nullptr); + for (const auto & inline_frame : inline_frames) + appendLocationToResult(result, inline_frame.location, &inline_frame); + } +private: - inline ALWAYS_INLINE void appendLocation2Result(StringRefs & result, Dwarf::LocationInfo & location, Dwarf::SymbolizedFrame * frame) const + inline ALWAYS_INLINE void appendLocationToResult(StringRefs & result, const Dwarf::LocationInfo & location, const Dwarf::SymbolizedFrame * frame) const { const char * arena_begin = nullptr; WriteBufferFromArena out(cache.arena, arena_begin); @@ -136,48 +84,6 @@ private: result.emplace_back(out.complete()); } - StringRefs impl(uintptr_t addr) const - { - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; - - if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) - { - auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first; - if (!std::filesystem::exists(object->name)) - return {}; - - Dwarf::LocationInfo location; - std::vector inline_frames; - if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FULL_WITH_INLINE, inline_frames)) - { - StringRefs ret; - appendLocation2Result(ret, location, nullptr); - for (auto & inline_frame : inline_frames) - appendLocation2Result(ret, inline_frame.location, &inline_frame); - return ret; - } - else - { - return {object->name}; - } - } - else - return {}; - } - - /// ALWAYS_INLINE is also a self-containing testcase used in 0_stateless/02161_addressToLineWithInlines. - /// If changed here, change 02161 together. - inline ALWAYS_INLINE StringRefs implCached(uintptr_t addr) const - { - Cache::Map::LookupResult it; - bool inserted; - std::lock_guard lock(cache.mutex); - cache.map.emplace(addr, it, inserted); - if (inserted) - it->getMapped() = impl(addr); - return it->getMapped(); - } }; } From 4c34d8f56349423033a84e8ef9ca545e931fa43c Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 00:30:27 +0800 Subject: [PATCH 04/39] fix function names --- src/Functions/addressToLine.cpp | 1 + src/Functions/addressToLine.h | 2 -- src/Functions/addressToLineWithInlines.cpp | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp index 47390f53147..6c9eba160cf 100644 --- a/src/Functions/addressToLine.cpp +++ b/src/Functions/addressToLine.cpp @@ -21,6 +21,7 @@ class FunctionAddressToLine: public FunctionAddressToLineBasecheckAccess(AccessType::addressToLine); diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h index c2130da56a5..8001c90d000 100644 --- a/src/Functions/addressToLine.h +++ b/src/Functions/addressToLine.h @@ -35,8 +35,6 @@ template class FunctionAddressToLineBase : public IFunction { public: - static constexpr auto name = "addressToLineBase"; - String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } diff --git a/src/Functions/addressToLineWithInlines.cpp b/src/Functions/addressToLineWithInlines.cpp index e17fe2a33cc..c3e62bd802e 100644 --- a/src/Functions/addressToLineWithInlines.cpp +++ b/src/Functions/addressToLineWithInlines.cpp @@ -24,6 +24,7 @@ class FunctionAddressToLineWithInlines: public FunctionAddressToLineBasecheckAccess(AccessType::addressToLineWithInlines); From c3e541376d2c78272e5c9c133eca066ddaac5c7c Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 00:47:43 +0800 Subject: [PATCH 05/39] fix style: add pragma once --- src/Functions/addressToLine.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h index 8001c90d000..8216f114b2e 100644 --- a/src/Functions/addressToLine.h +++ b/src/Functions/addressToLine.h @@ -1,3 +1,4 @@ +#pragma once #if defined(__ELF__) && !defined(__FreeBSD__) #include From bdf4305bfb4b57f7e795f8f5314d97cc36e48cc4 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 01:33:44 +0800 Subject: [PATCH 06/39] simplify test --- .../02161_addressToLineWithInlines.reference | 6 +- .../02161_addressToLineWithInlines.sh | 109 ------------------ .../02161_addressToLineWithInlines.sql | 24 ++++ 3 files changed, 26 insertions(+), 113 deletions(-) delete mode 100755 tests/queries/0_stateless/02161_addressToLineWithInlines.sh create mode 100644 tests/queries/0_stateless/02161_addressToLineWithInlines.sql diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.reference b/tests/queries/0_stateless/02161_addressToLineWithInlines.reference index 48108d5596c..10e2c7069b3 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.reference +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.reference @@ -1,4 +1,2 @@ -CHECK: privilege -Code: 446. -CHECK: basic call -Success +10000000000 +has inlines: 1 diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sh b/tests/queries/0_stateless/02161_addressToLineWithInlines.sh deleted file mode 100755 index 0faad6c8e4f..00000000000 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sh +++ /dev/null @@ -1,109 +0,0 @@ -#! /bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug -# tags are copied from 00974_query_profiler.sql - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -export CLICKHOUSE_DATABASE=system -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -MAX_FAILED_COUNT=10 -MAX_RETRY_COUNT=10 -log_comment="02161_testcase_$(date +'%s')" - -check_exist_sql="SELECT count(), query_id FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id IN ( - SELECT query_id FROM query_log WHERE log_comment = '${log_comment}' ORDER BY event_time DESC LIMIT 1 -) GROUP BY query_id" - -declare exist_string_result -declare -A exist_result=([count]=0 [query_id]="") - -function update_log_comment() { - log_comment="02161_testcase_$(date +'%s')" -} - - -function flush_log() { - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d 'SYSTEM FLUSH LOGS' -} - -function get_trace_count() { - flush_log - ${CLICKHOUSE_CLIENT} -q 'SELECT count() from system.trace_log'; -} - -function make_trace() { - ${CLICKHOUSE_CLIENT} --query_profiler_cpu_time_period_ns=1000000 --allow_introspection_functions 1 -q "SELECT addressToLineWithInlines(arrayJoin(trace)) FROM system.trace_log SETTINGS log_comment='${log_comment}'" -} - -function check_exist() { - exist_string_result=$(${CLICKHOUSE_CLIENT} --log_queries=0 -q "${check_exist_sql}") - exist_result[count]="$(echo "$exist_string_result" | cut -f 1)" - exist_result[query_id]="$(echo "$exist_string_result" | cut -f 2)" -} - -function get4fail() { - ${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "SELECT addressToLineWithInlines(arrayJoin(trace)) FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id='$1'" - ${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "SELECT addressToLine(arrayJoin(trace)) FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id='$1'" -} - -function final_check_inlines() { - final_check_sql="WITH - address_list AS - ( - SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id='$1' - ) -SELECT max(length(addressToLineWithInlines(address))) > 1 FROM address_list;" - result="$(${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "$final_check_sql")" - [[ "$result" == "1" ]] -} - -function final_check() { - final_check_sql="WITH - address_list AS - ( - SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id='$1' - ) -SELECT max(length(addressToLineWithInlines(address))) >= 1 FROM address_list;" - result="$(${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "$final_check_sql")" - [[ "$result" == "1" ]] -} - -echo "CHECK: privilege" -${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d 'SELECT addressToLineWithInlines(1);' | grep -oF 'Code: 446.' || echo 'FAIL' - -echo "CHECK: basic call" - -# won't check inline because there is no debug symbol in some test env. -# e.g: https://s3.amazonaws.com/clickhouse-test-reports/33467/2081b43c9ee59615b2fd31c77390744b10eef61e/stateless_tests__release__wide_parts_enabled__actions_.html - -flush_log -result="" -for ((i=0;i /dev/null - flush_log - sleep 1 - check_exist - done - if final_check "${exist_result[query_id]}";then - result="Success" - break - fi - update_log_comment -done - -if final_check "${exist_result[query_id]}"; then - result="Success" -else - echo "query_id: ${exist_result[query_id]}, count: ${exist_result[count]}" - get4fail "${exist_result[query_id]}" -fi -echo "$result" diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql new file mode 100644 index 00000000000..9b7249ceff4 --- /dev/null +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -0,0 +1,24 @@ +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug + + +SELECT addressToLineWithInlines(1); -- { serverError 446 } + +SET allow_introspection_functions = 1; +SET query_profiler_real_time_period_ns = 0; +SET query_profiler_cpu_time_period_ns = 1000000; +SET log_queries = 1; +SELECT count() FROM numbers_mt(10000000000) SETTINGS log_comment='02161_test_case'; +SET log_queries = 0; +SET query_profiler_cpu_time_period_ns = 0; +SYSTEM FLUSH LOGS; + +WITH + address_list AS + ( + SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id = + ( + SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment='02161_test_case' ORDER BY event_time DESC LIMIT 1 + ) + ) +SELECT 'has inlines:', max(length(addressToLineWithInlines(address))) > 1 FROM address_list; + From 4c605c80f320d617465a4cfaf460e374af8bc7e2 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 20:02:39 +0800 Subject: [PATCH 07/39] improve test, check whether there is no symbol --- .../0_stateless/02161_addressToLineWithInlines.sql | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index 9b7249ceff4..29be9ae85f6 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -13,12 +13,13 @@ SET query_profiler_cpu_time_period_ns = 0; SYSTEM FLUSH LOGS; WITH - address_list AS + lineWithInlines AS ( - SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id = + SELECT DISTINCT addressToLineWithInlines(arrayJoin(trace)) AS lineWithInlines FROM system.trace_log WHERE query_id = ( SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment='02161_test_case' ORDER BY event_time DESC LIMIT 1 ) ) -SELECT 'has inlines:', max(length(addressToLineWithInlines(address))) > 1 FROM address_list; - +SELECT 'has inlines:', or(max(length(lineWithInlines)) > 1, not any(locate(lineWithInlines[1], ':') != 0)) FROM lineWithInlines SETTINGS short_circuit_function_evaluation='enable'; +-- `max(length(lineWithInlines)) > 1` check there is any inlines. +-- `not any(locate(lineWithInlines[1], ':') != 0)` check whether none could get a symbol. From eca0453564f479b4530552ca16521215fcc77bc2 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Thu, 27 Jan 2022 16:33:40 +0800 Subject: [PATCH 08/39] fix local metadata differ zk metadata --- src/Common/IFactoryWithAliases.h | 8 +-- src/Storages/KeyDescription.cpp | 16 +++++ src/Storages/KeyDescription.h | 3 + .../ReplicatedMergeTreeTableMetadata.cpp | 61 ++++++++++++------- .../ReplicatedMergeTreeTableMetadata.h | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/TTLDescription.cpp | 16 +++++ src/Storages/TTLDescription.h | 3 + 8 files changed, 82 insertions(+), 31 deletions(-) diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index 7f5b53a80fa..f7da302a942 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -120,12 +120,8 @@ public: const String & getCanonicalNameIfAny(const String & name) const { auto it = case_insensitive_name_mapping.find(Poco::toLower(name)); - if (it != case_insensitive_name_mapping.end()) { - if (it->first != name) - { - return it->second; - } - } + if (it != case_insensitive_name_mapping.end()) + return it->second; return name; } diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 6a2f4bbb055..9db730ba578 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -8,6 +8,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -161,4 +164,17 @@ KeyDescription KeyDescription::buildEmptyKey() return result; } +KeyDescription KeyDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context) +{ + KeyDescription result; + if (str.empty()) + return result; + + ParserExpressionElement parser; + ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + FunctionNameNormalizer().visit(ast.get()); + + return getKeyFromAST(ast, columns, context); +} + } diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h index 81803c3e44b..527a36124aa 100644 --- a/src/Storages/KeyDescription.h +++ b/src/Storages/KeyDescription.h @@ -76,6 +76,9 @@ struct KeyDescription /// Substitute modulo with moduloLegacy. Used in KeyCondition to allow proper comparison with keys. static bool moduloToModuloLegacyRecursive(ASTPtr node_expr); + + /// Parse description from string + static KeyDescription parse(const String & str, const ColumnsDescription & columns, ContextPtr context); }; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 0637a6bb027..7dee7b8d0f8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -168,7 +168,7 @@ ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const S } -void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const +void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const { if (data_format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { @@ -203,9 +203,12 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat /// NOTE: You can make a less strict check of match expressions so that tables do not break from small changes /// in formatAST code. - if (primary_key != from_zk.primary_key) + String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).expression_list_ast); + if (primary_key != parsed_zk_primary_key) throw Exception("Existing table metadata in ZooKeeper differs in primary key." - " Stored in ZooKeeper: " + from_zk.primary_key + ", local: " + primary_key, + " Stored in ZooKeeper: " + from_zk.primary_key + + ", parsed from ZooKeeper: " + parsed_zk_primary_key + + ", local: " + primary_key, ErrorCodes::METADATA_MISMATCH); if (data_format_version != from_zk.data_format_version) @@ -214,39 +217,53 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat ", local: " + DB::toString(data_format_version.toUnderType()), ErrorCodes::METADATA_MISMATCH); - if (partition_key != from_zk.partition_key) + String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context).expression_list_ast); + if (partition_key != parsed_zk_partition_key) throw Exception( "Existing table metadata in ZooKeeper differs in partition key expression." - " Stored in ZooKeeper: " + from_zk.partition_key + ", local: " + partition_key, + " Stored in ZooKeeper: " + from_zk.partition_key + + ", parsed from ZooKeeper: " + parsed_zk_partition_key + + ", local: " + partition_key, ErrorCodes::METADATA_MISMATCH); - } void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const { - checkImmutableFieldsEquals(from_zk); + checkImmutableFieldsEquals(from_zk, columns, context); - if (sampling_expression != from_zk.sampling_expression) - throw Exception("Existing table metadata in ZooKeeper differs in sample expression." - " Stored in ZooKeeper: " + from_zk.sampling_expression + ", local: " + sampling_expression, - ErrorCodes::METADATA_MISMATCH); - - if (sorting_key != from_zk.sorting_key) + String parsed_zk_sampling_expression = formattedAST(KeyDescription::parse(from_zk.sampling_expression, columns, context).definition_ast); + if (sampling_expression != parsed_zk_sampling_expression) { throw Exception( - "Existing table metadata in ZooKeeper differs in sorting key expression." - " Stored in ZooKeeper: " + from_zk.sorting_key + ", local: " + sorting_key, + "Existing table metadata in ZooKeeper differs in sample expression." + " Stored in ZooKeeper: " + from_zk.sampling_expression + + ", parsed from ZooKeeper: " + parsed_zk_sampling_expression + + ", local: " + sampling_expression, ErrorCodes::METADATA_MISMATCH); } - if (ttl_table != from_zk.ttl_table) + String parsed_zk_sorting_key = formattedAST(extractKeyExpressionList(KeyDescription::parse(from_zk.sorting_key, columns, context).definition_ast)); + if (sorting_key != parsed_zk_sorting_key) { throw Exception( - "Existing table metadata in ZooKeeper differs in TTL." - " Stored in ZooKeeper: " + from_zk.ttl_table + - ", local: " + ttl_table, - ErrorCodes::METADATA_MISMATCH); + "Existing table metadata in ZooKeeper differs in sorting key expression." + " Stored in ZooKeeper: " + from_zk.sorting_key + + ", parsed from ZooKeeper: " + parsed_zk_sorting_key + + ", local: " + sorting_key, + ErrorCodes::METADATA_MISMATCH); + } + + auto parsed_primary_key = KeyDescription::parse(primary_key, columns, context); + String parsed_zk_ttl_table = formattedAST(TTLTableDescription::parse(from_zk.ttl_table, columns, context, parsed_primary_key).definition_ast); + if (ttl_table != parsed_zk_ttl_table) + { + throw Exception( + "Existing table metadata in ZooKeeper differs in TTL." + " Stored in ZooKeeper: " + from_zk.ttl_table + + ", parsed from ZooKeeper: " + parsed_zk_ttl_table + + ", local: " + ttl_table, + ErrorCodes::METADATA_MISMATCH); } String parsed_zk_skip_indices = IndicesDescription::parse(from_zk.skip_indices, columns, context).toString(); @@ -290,10 +307,10 @@ void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTabl } ReplicatedMergeTreeTableMetadata::Diff -ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk) const +ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const { - checkImmutableFieldsEquals(from_zk); + checkImmutableFieldsEquals(from_zk, columns, context); Diff diff; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 2f9a9d58834..6d510d20304 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -70,11 +70,11 @@ struct ReplicatedMergeTreeTableMetadata void checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const; - Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk) const; + Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const; private: - void checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const; + void checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const; bool index_granularity_bytes_found_in_zk = false; }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7743736724f..ce4af6b7af1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4478,7 +4478,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer auto alter_lock_holder = lockForAlter(getSettings()->lock_acquire_timeout_for_background_operations); LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); - auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry); + auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry, getInMemoryMetadataPtr()->getColumns(), getContext()); setTableStructure(std::move(columns_from_entry), metadata_diff); metadata_version = entry.alter_version; diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index bd5cc9e2f9d..96048d9cd99 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -15,6 +15,9 @@ #include #include +#include +#include +#include namespace DB @@ -370,4 +373,17 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST( return result; } +TTLTableDescription TTLTableDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key) +{ + TTLTableDescription result; + if (str.empty()) + return result; + + ParserTTLElement parser; + ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + FunctionNameNormalizer().visit(ast.get()); + + return getTTLForTableFromAST(ast, columns, context, primary_key); +} + } diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h index 6288098b3c5..17020392013 100644 --- a/src/Storages/TTLDescription.h +++ b/src/Storages/TTLDescription.h @@ -118,6 +118,9 @@ struct TTLTableDescription static TTLTableDescription getTTLForTableFromAST( const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key); + + /// Parse description from string + static TTLTableDescription parse(const String & str, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key); }; } From a69711ccff2a7dab022dd3c1d18a6a314469b6ec Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 28 Jan 2022 19:07:59 +0800 Subject: [PATCH 09/39] ExpressionList parse tuple and ttl --- src/Storages/KeyDescription.cpp | 6 +++--- src/Storages/TTLDescription.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 9db730ba578..24b4b13bc21 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include @@ -170,8 +170,8 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio if (str.empty()) return result; - ParserExpressionElement parser; - ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ParserExpression parser; + ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); FunctionNameNormalizer().visit(ast.get()); return getKeyFromAST(ast, columns, context); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 96048d9cd99..69303264482 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include @@ -379,7 +379,7 @@ TTLTableDescription TTLTableDescription::parse(const String & str, const Columns if (str.empty()) return result; - ParserTTLElement parser; + ParserTTLExpressionList parser; ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); FunctionNameNormalizer().visit(ast.get()); From 93f9a9e37a46eee573746d456e1bb4a5b3b50940 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 2 Feb 2022 14:32:29 +0000 Subject: [PATCH 10/39] Update clickhouse-keeper.md fix the run command and add example --- docs/en/operations/clickhouse-keeper.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index fcfc675f9d7..48eb590aca2 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -108,7 +108,8 @@ Examples of configuration for quorum with three nodes can be found in [integrati ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with: ```bash -clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon +clickhouse keeper --config /etc/your_path_to_config/config.xml --daemon +example: clickhouse keeper --config /etc/clickhouse-server/config.d/keeper_config.xml ``` ## Four Letter Word Commands {#four-letter-word-commands} From f8ef1cd23d02b270bc6b18f34a63730eeb4ac767 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 29 Jan 2022 00:31:50 +0700 Subject: [PATCH 11/39] Add submodule minizip-ng --- .gitmodules | 3 + contrib/CMakeLists.txt | 1 + contrib/cassandra-cmake/CMakeLists.txt | 13 +- contrib/minizip-ng | 1 + contrib/minizip-ng-cmake/CMakeLists.txt | 168 ++++++++++++++++++++++++ contrib/minizip-ng-cmake/unzip.h | 13 ++ contrib/minizip-ng-cmake/zip.h | 13 ++ src/CMakeLists.txt | 4 + src/Common/config.h.in | 1 + src/configure_config.cmake | 3 + 10 files changed, 209 insertions(+), 11 deletions(-) create mode 160000 contrib/minizip-ng create mode 100644 contrib/minizip-ng-cmake/CMakeLists.txt create mode 100644 contrib/minizip-ng-cmake/unzip.h create mode 100644 contrib/minizip-ng-cmake/zip.h diff --git a/.gitmodules b/.gitmodules index ed023ab348b..91f4ddb2007 100644 --- a/.gitmodules +++ b/.gitmodules @@ -259,3 +259,6 @@ [submodule "contrib/azure"] path = contrib/azure url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git +[submodule "contrib/minizip-ng"] + path = contrib/minizip-ng + url = https://github.com/zlib-ng/minizip-ng diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 6172f231b6e..9cf307c473e 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -78,6 +78,7 @@ add_contrib (croaring-cmake croaring) add_contrib (zstd-cmake zstd) add_contrib (zlib-ng-cmake zlib-ng) add_contrib (bzip2-cmake bzip2) +add_contrib (minizip-ng-cmake minizip-ng) add_contrib (snappy-cmake snappy) add_contrib (rocksdb-cmake rocksdb) add_contrib (thrift-cmake thrift) diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt index 416dca6f2bc..81c1fab3882 100644 --- a/contrib/cassandra-cmake/CMakeLists.txt +++ b/contrib/cassandra-cmake/CMakeLists.txt @@ -56,19 +56,11 @@ list(APPEND SOURCES ${CASS_SRC_DIR}/atomic/atomic_std.hpp) add_library(_curl_hostcheck OBJECT ${CASS_SRC_DIR}/third_party/curl/hostcheck.cpp) add_library(_hdr_histogram OBJECT ${CASS_SRC_DIR}/third_party/hdr_histogram/hdr_histogram.cpp) add_library(_http-parser OBJECT ${CASS_SRC_DIR}/third_party/http-parser/http_parser.c) -add_library(_minizip OBJECT - ${CASS_SRC_DIR}/third_party/minizip/ioapi.c - ${CASS_SRC_DIR}/third_party/minizip/zip.c - ${CASS_SRC_DIR}/third_party/minizip/unzip.c) - -target_link_libraries(_minizip ch_contrib::zlib) -target_compile_definitions(_minizip PRIVATE "-Dz_crc_t=unsigned long") list(APPEND INCLUDE_DIRS ${CASS_SRC_DIR}/third_party/curl ${CASS_SRC_DIR}/third_party/hdr_histogram ${CASS_SRC_DIR}/third_party/http-parser - ${CASS_SRC_DIR}/third_party/minizip ${CASS_SRC_DIR}/third_party/mt19937_64 ${CASS_SRC_DIR}/third_party/rapidjson/rapidjson ${CASS_SRC_DIR}/third_party/sparsehash/src) @@ -123,10 +115,9 @@ add_library(_cassandra ${SOURCES} $ $ - $ - $) + $) -target_link_libraries(_cassandra ch_contrib::zlib) +target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip) target_include_directories(_cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS}) target_include_directories(_cassandra SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR}) target_compile_definitions(_cassandra PRIVATE CASS_BUILDING) diff --git a/contrib/minizip-ng b/contrib/minizip-ng new file mode 160000 index 00000000000..6cffc951851 --- /dev/null +++ b/contrib/minizip-ng @@ -0,0 +1 @@ +Subproject commit 6cffc951851620e0fac1993be75e4713c334de03 diff --git a/contrib/minizip-ng-cmake/CMakeLists.txt b/contrib/minizip-ng-cmake/CMakeLists.txt new file mode 100644 index 00000000000..4aabbd3c9fb --- /dev/null +++ b/contrib/minizip-ng-cmake/CMakeLists.txt @@ -0,0 +1,168 @@ +option(ENABLE_MINIZIP "Enable minizip-ng the zip manipulation library" ${ENABLE_LIBRARIES}) +if (NOT ENABLE_MINIZIP) + message (STATUS "minizip-ng disabled") + return() +endif() + +set(_MINIZIP_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/minizip-ng") + +# Initial source files +set(MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_crypt.c + ${_MINIZIP_SOURCE_DIR}/mz_os.c + ${_MINIZIP_SOURCE_DIR}/mz_strm.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_buf.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_mem.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_split.c + ${_MINIZIP_SOURCE_DIR}/mz_zip.c + ${_MINIZIP_SOURCE_DIR}/mz_zip_rw.c) + +# Initial header files +set(MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz.h + ${_MINIZIP_SOURCE_DIR}/mz_os.h + ${_MINIZIP_SOURCE_DIR}/mz_crypt.h + ${_MINIZIP_SOURCE_DIR}/mz_strm.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_buf.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_mem.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_split.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_os.h + ${_MINIZIP_SOURCE_DIR}/mz_zip.h + ${_MINIZIP_SOURCE_DIR}/mz_zip_rw.h) + +set(MINIZIP_INC ${_MINIZIP_SOURCE_DIR}) + +set(MINIZIP_DEF) +set(MINIZIP_PUBLIC_DEF) +set(MINIZIP_LIB) + +# Check if zlib is present +set(MZ_ZLIB ON) +if(MZ_ZLIB) + # Use zlib from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::zlib) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.h) + + list(APPEND MINIZIP_DEF "-DHAVE_ZLIB") +endif() + +# Check if bzip2 is present +set(MZ_BZIP2 ${ENABLE_BZIP2}) +if(MZ_BZIP2) + # Use bzip2 from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::bzip2) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.h) + + list(APPEND MINIZIP_DEF "-DHAVE_BZIP2") +endif() + +# Check if liblzma is present +set(MZ_LZMA ON) +if(MZ_LZMA) + # Use liblzma from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::xz) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.h) + + list(APPEND MINIZIP_DEF "-DHAVE_LZMA") +endif() + +# Check if zstd is present +set(MZ_ZSTD ON) +if(MZ_ZSTD) + # Use zstd from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::zstd) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.h) + + list(APPEND MINIZIP_DEF "-DHAVE_ZSTD") +endif() + +if(NOT MZ_ZLIB AND NOT MZ_ZSTD AND NOT MZ_BZIP2 AND NOT MZ_LZMA) + message(STATUS "Compression not supported due to missing libraries") + + list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_DECOMPRESSION) + list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_COMPRESSION) +endif() + +# Check to see if openssl installation is present +set(MZ_OPENSSL ${ENABLE_SSL}) +if(MZ_OPENSSL) + # Use openssl from ClickHouse contrib + list(APPEND MINIZIP_LIB OpenSSL::SSL OpenSSL::Crypto) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_crypt_openssl.c) +endif() + +# Include WinZIP AES encryption +set(MZ_WZAES ${ENABLE_SSL}) +if(MZ_WZAES) + list(APPEND MINIZIP_DEF -DHAVE_WZAES) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.h) +endif() + +# Include traditional PKWare encryption +set(MZ_PKCRYPT ON) +if(MZ_PKCRYPT) + list(APPEND MINIZIP_DEF -DHAVE_PKCRYPT) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.h) +endif() + +# Unix specific +if(UNIX) + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_os_posix.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_os_posix.c) +endif() + +# Include compatibility layer +set(MZ_COMPAT ON) +if(MZ_COMPAT) + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_compat.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_compat.h + zip.h + unzip.h) + + list(APPEND MINIZIP_INC "${CMAKE_CURRENT_SOURCE_DIR}") + list(APPEND MINIZIP_PUBLIC_DEF "-DMZ_COMPAT_VERSION=110") +endif() + +add_library(_minizip ${MINIZIP_SRC} ${MINIZIP_HDR}) +target_include_directories(_minizip PUBLIC ${MINIZIP_INC}) +target_compile_definitions(_minizip PUBLIC ${MINIZIP_PUBLIC_DEF}) +target_compile_definitions(_minizip PRIVATE ${MINIZIP_DEF}) +target_link_libraries(_minizip PRIVATE ${MINIZIP_LIB}) + +add_library(ch_contrib::minizip ALIAS _minizip) diff --git a/contrib/minizip-ng-cmake/unzip.h b/contrib/minizip-ng-cmake/unzip.h new file mode 100644 index 00000000000..61cbd974e31 --- /dev/null +++ b/contrib/minizip-ng-cmake/unzip.h @@ -0,0 +1,13 @@ +/* unzip.h -- Compatibility layer shim + part of the minizip-ng project + + This program is distributed under the terms of the same license as zlib. + See the accompanying LICENSE file for the full text of the license. +*/ + +#ifndef MZ_COMPAT_UNZIP +#define MZ_COMPAT_UNZIP + +#include "mz_compat.h" + +#endif diff --git a/contrib/minizip-ng-cmake/zip.h b/contrib/minizip-ng-cmake/zip.h new file mode 100644 index 00000000000..cf38ac91a04 --- /dev/null +++ b/contrib/minizip-ng-cmake/zip.h @@ -0,0 +1,13 @@ +/* zip.h -- Compatibility layer shim + part of the minizip-ng project + + This program is distributed under the terms of the same license as zlib. + See the accompanying LICENSE file for the full text of the license. +*/ + +#ifndef MZ_COMPAT_ZIP +#define MZ_COMPAT_ZIP + +#include "mz_compat.h" + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 57d4bf29491..a3f9e771e0b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -513,6 +513,10 @@ if (TARGET ch_contrib::bzip2) target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2) endif() +if (TARGET ch_contrib::minizip) + target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::minizip) +endif () + if (TARGET ch_contrib::simdjson) dbms_target_link_libraries(PRIVATE ch_contrib::simdjson) endif() diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 3d785e0d0fb..edade4ce2be 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -17,6 +17,7 @@ #cmakedefine01 USE_YAML_CPP #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 USE_BZIP2 +#cmakedefine01 USE_MINIZIP #cmakedefine01 USE_SNAPPY #cmakedefine01 USE_HIVE #cmakedefine01 USE_ODBC diff --git a/src/configure_config.cmake b/src/configure_config.cmake index ce50ab87afc..519307ba28a 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -4,6 +4,9 @@ endif() if (TARGET ch_contrib::bzip2) set(USE_BZIP2 1) endif() +if (TARGET ch_contrib::minizip) + set(USE_MINIZIP 1) +endif() if (TARGET ch_contrib::snappy) set(USE_SNAPPY 1) endif() From 23fac284ea57da86e69d1154a1045c0d785c788c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 29 Jan 2022 00:32:35 +0700 Subject: [PATCH 12/39] Add utility classes ZipArchiveReader and ZipArchiveWriter. --- src/CMakeLists.txt | 1 + src/Common/ErrorCodes.cpp | 2 + src/IO/Archives/IArchiveReader.h | 60 ++ src/IO/Archives/IArchiveWriter.h | 38 ++ src/IO/Archives/ZipArchiveReader.cpp | 563 ++++++++++++++++++ src/IO/Archives/ZipArchiveReader.h | 86 +++ src/IO/Archives/ZipArchiveWriter.cpp | 385 ++++++++++++ src/IO/Archives/ZipArchiveWriter.h | 97 +++ src/IO/Archives/createArchiveReader.cpp | 38 ++ src/IO/Archives/createArchiveReader.h | 22 + src/IO/Archives/createArchiveWriter.cpp | 38 ++ src/IO/Archives/createArchiveWriter.h | 19 + .../tests/gtest_archive_reader_and_writer.cpp | 341 +++++++++++ 13 files changed, 1690 insertions(+) create mode 100644 src/IO/Archives/IArchiveReader.h create mode 100644 src/IO/Archives/IArchiveWriter.h create mode 100644 src/IO/Archives/ZipArchiveReader.cpp create mode 100644 src/IO/Archives/ZipArchiveReader.h create mode 100644 src/IO/Archives/ZipArchiveWriter.cpp create mode 100644 src/IO/Archives/ZipArchiveWriter.h create mode 100644 src/IO/Archives/createArchiveReader.cpp create mode 100644 src/IO/Archives/createArchiveReader.h create mode 100644 src/IO/Archives/createArchiveWriter.cpp create mode 100644 src/IO/Archives/createArchiveWriter.h create mode 100644 src/IO/tests/gtest_archive_reader_and_writer.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3f9e771e0b..f04f18a4639 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -79,6 +79,7 @@ set(dbms_sources) add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) add_headers_and_sources(clickhouse_common_io IO) +add_headers_and_sources(clickhouse_common_io IO/Archives) add_headers_and_sources(clickhouse_common_io IO/S3) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 82714de3470..e991daf3209 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -610,6 +610,8 @@ M(639, SNAPPY_COMPRESS_FAILED) \ M(640, NO_HIVEMETASTORE) \ M(641, CANNOT_APPEND_TO_FILE) \ + M(642, CANNOT_PACK_ARCHIVE) \ + M(643, CANNOT_UNPACK_ARCHIVE) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/IO/Archives/IArchiveReader.h b/src/IO/Archives/IArchiveReader.h new file mode 100644 index 00000000000..584e80a7d09 --- /dev/null +++ b/src/IO/Archives/IArchiveReader.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ReadBuffer; +class ReadBufferFromFileBase; +class SeekableReadBuffer; + +/// Interface for reading an archive. +class IArchiveReader : public std::enable_shared_from_this +{ +public: + virtual ~IArchiveReader() = default; + + /// Returns true if there is a specified file in the archive. + virtual bool fileExists(const String & filename) = 0; + + struct FileInfo + { + UInt64 uncompressed_size; + UInt64 compressed_size; + int compression_method; + bool is_encrypted; + }; + + /// Returns the information about a file stored in the archive. + virtual FileInfo getFileInfo(const String & filename) = 0; + + class FileEnumerator + { + public: + virtual ~FileEnumerator() = default; + virtual const String & getFileName() const = 0; + virtual const FileInfo & getFileInfo() const = 0; + virtual bool nextFile() = 0; + }; + + /// Starts enumerating files in the archive. + virtual std::unique_ptr firstFile() = 0; + + /// Starts reading a file from the archive. The function returns a read buffer, + /// you can read that buffer to extract uncompressed data from the archive. + /// Several read buffers can be used at the same time in parallel. + virtual std::unique_ptr readFile(const String & filename) = 0; + + /// It's possible to convert a file enumerator to a read buffer and vice versa. + virtual std::unique_ptr readFile(std::unique_ptr enumerator) = 0; + virtual std::unique_ptr nextFile(std::unique_ptr read_buffer) = 0; + + /// Sets password used to decrypt files in the archive. + virtual void setPassword(const String & /* password */) {} + + using ReadArchiveFunction = std::function()>; +}; + +} diff --git a/src/IO/Archives/IArchiveWriter.h b/src/IO/Archives/IArchiveWriter.h new file mode 100644 index 00000000000..6879d470b62 --- /dev/null +++ b/src/IO/Archives/IArchiveWriter.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class WriteBufferFromFileBase; + +/// Interface for writing an archive. +class IArchiveWriter : public std::enable_shared_from_this +{ +public: + /// Destructors finalizes writing the archive. + virtual ~IArchiveWriter() = default; + + /// Starts writing a file to the archive. The function returns a write buffer, + /// any data written to that buffer will be compressed and then put to the archive. + /// You can keep only one such buffer at a time, a buffer returned by previous call + /// of the function `writeFile()` should be destroyed before next call of `writeFile()`. + virtual std::unique_ptr writeFile(const String & filename) = 0; + + /// Returns true if there is an active instance of WriteBuffer returned by writeFile(). + /// This function should be used mostly for debugging purposes. + virtual bool isWritingFile() const = 0; + + static constexpr const int kDefaultCompressionLevel = -1; + + /// Sets compression method and level. + /// Changing them will affect next file in the archive. + virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} + + /// Sets password. If the password is not empty it will enable encryption in the archive. + virtual void setPassword(const String & /* password */) {} +}; + +} diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp new file mode 100644 index 00000000000..16604da62dc --- /dev/null +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -0,0 +1,563 @@ +#include + +#if USE_MINIZIP +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_UNPACK_ARCHIVE; + extern const int LOGICAL_ERROR; + extern const int SEEK_POSITION_OUT_OF_BOUND; +} + +using RawHandle = unzFile; + + +/// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor. +class ZipArchiveReader::HandleHolder +{ +public: + HandleHolder() = default; + + explicit HandleHolder(const std::shared_ptr & reader_) : reader(reader_), raw_handle(reader->acquireRawHandle()) { } + + ~HandleHolder() + { + if (raw_handle) + { + try + { + closeFile(); + } + catch (...) + { + tryLogCurrentException("ZipArchiveReader"); + } + reader->releaseRawHandle(raw_handle); + } + } + + HandleHolder(HandleHolder && src) + { + *this = std::move(src); + } + + HandleHolder & operator =(HandleHolder && src) + { + reader = std::exchange(src.reader, nullptr); + raw_handle = std::exchange(src.raw_handle, nullptr); + file_name = std::exchange(src.file_name, {}); + file_info = std::exchange(src.file_info, {}); + return *this; + } + + RawHandle getRawHandle() const { return raw_handle; } + std::shared_ptr getReader() const { return reader; } + + void locateFile(const String & file_name_) + { + resetFileInfo(); + bool case_sensitive = true; + int err = unzLocateFile(raw_handle, file_name_.c_str(), reinterpret_cast(static_cast(case_sensitive))); + if (err == UNZ_END_OF_LIST_OF_FILE) + showError("File " + quoteString(file_name_) + " not found"); + file_name = file_name_; + } + + bool tryLocateFile(const String & file_name_) + { + resetFileInfo(); + bool case_sensitive = true; + int err = unzLocateFile(raw_handle, file_name_.c_str(), reinterpret_cast(static_cast(case_sensitive))); + if (err == UNZ_END_OF_LIST_OF_FILE) + return false; + checkResult(err); + file_name = file_name_; + return true; + } + + bool firstFile() + { + resetFileInfo(); + int err = unzGoToFirstFile(raw_handle); + if (err == UNZ_END_OF_LIST_OF_FILE) + return false; + checkResult(err); + return true; + } + + bool nextFile() + { + resetFileInfo(); + int err = unzGoToNextFile(raw_handle); + if (err == UNZ_END_OF_LIST_OF_FILE) + return false; + checkResult(err); + return true; + } + + const String & getFileName() const + { + if (!file_name) + retrieveFileInfo(); + return *file_name; + } + + const FileInfo & getFileInfo() const + { + if (!file_info) + retrieveFileInfo(); + return *file_info; + } + + void closeFile() + { + int err = unzCloseCurrentFile(raw_handle); + /// If err == UNZ_PARAMERROR the file is already closed. + if (err != UNZ_PARAMERROR) + checkResult(err); + } + + void checkResult(int code) const { reader->checkResult(code); } + [[noreturn]] void showError(const String & message) const { reader->showError(message); } + +private: + void retrieveFileInfo() const + { + if (file_name && file_info) + return; + unz_file_info64 finfo; + int err = unzGetCurrentFileInfo64(raw_handle, &finfo, nullptr, 0, nullptr, 0, nullptr, 0); + if (err == UNZ_PARAMERROR) + showError("No current file"); + checkResult(err); + if (!file_info) + { + file_info.emplace(); + file_info->uncompressed_size = finfo.uncompressed_size; + file_info->compressed_size = finfo.compressed_size; + file_info->compression_method = finfo.compression_method; + file_info->is_encrypted = (finfo.flag & MZ_ZIP_FLAG_ENCRYPTED); + } + if (!file_name) + { + file_name.emplace(); + file_name->resize(finfo.size_filename); + checkResult(unzGetCurrentFileInfo64(raw_handle, nullptr, file_name->data(), finfo.size_filename, nullptr, 0, nullptr, 0)); + } + } + + void resetFileInfo() + { + file_info.reset(); + file_name.reset(); + } + + std::shared_ptr reader; + RawHandle raw_handle = nullptr; + mutable std::optional file_name; + mutable std::optional file_info; +}; + + +/// This class represents a ReadBuffer actually returned by readFile(). +class ZipArchiveReader::ReadBufferFromZipArchive : public ReadBufferFromFileBase +{ +public: + explicit ReadBufferFromZipArchive(HandleHolder && handle_) + : ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) + , handle(std::move(handle_)) + { + const auto & file_info = handle.getFileInfo(); + checkCompressionMethodIsEnabled(static_cast(file_info.compression_method)); + + const char * password_cstr = nullptr; + if (file_info.is_encrypted) + { + const auto & password_str = handle.getReader()->password; + if (password_str.empty()) + showError("Password is required"); + password_cstr = password_str.c_str(); + checkEncryptionIsEnabled(); + } + + RawHandle raw_handle = handle.getRawHandle(); + int err = unzOpenCurrentFilePassword(raw_handle, password_cstr); + if (err == MZ_PASSWORD_ERROR) + showError("Wrong password"); + checkResult(err); + } + + off_t seek(off_t off, int whence) override + { + off_t current_pos = getPosition(); + off_t new_pos; + if (whence == SEEK_SET) + new_pos = off; + else if (whence == SEEK_CUR) + new_pos = off + current_pos; + else + throw Exception("Only SEEK_SET and SEEK_CUR seek modes allowed.", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + if (new_pos == current_pos) + return current_pos; /// The position is the same. + + if (new_pos < 0) + throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + off_t working_buffer_start_pos = current_pos - offset(); + off_t working_buffer_end_pos = current_pos + available(); + + if ((working_buffer_start_pos <= new_pos) && (new_pos <= working_buffer_end_pos)) + { + /// The new position is still inside the buffer. + position() += new_pos - current_pos; + return new_pos; + } + + RawHandle raw_handle = handle.getRawHandle(); + + /// Check that the new position is now beyond the end of the file. + const auto & file_info = handle.getFileInfo(); + if (new_pos > static_cast(file_info.uncompressed_size)) + throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + if (file_info.compression_method == static_cast(CompressionMethod::kStore)) + { + /// unzSeek64() works only for non-compressed files. + checkResult(unzSeek64(raw_handle, off, whence)); + return unzTell64(raw_handle); + } + + /// As a last try we go slow way, we're going to simply ignore all data before the new position. + if (new_pos < current_pos) + { + checkResult(unzCloseCurrentFile(raw_handle)); + checkResult(unzOpenCurrentFile(raw_handle)); + current_pos = 0; + } + + ignore(new_pos - current_pos); + return new_pos; + } + + off_t getPosition() override + { + RawHandle raw_handle = handle.getRawHandle(); + return unzTell64(raw_handle) - available(); + } + + String getFileName() const override { return handle.getFileName(); } + + /// Releases owned handle to pass it to an enumerator. + HandleHolder releaseHandle() && + { + handle.closeFile(); + return std::move(handle); + } + +private: + bool nextImpl() override + { + RawHandle raw_handle = handle.getRawHandle(); + auto bytes_read = unzReadCurrentFile(raw_handle, internal_buffer.begin(), internal_buffer.size()); + + if (bytes_read < 0) + checkResult(bytes_read); + + if (!bytes_read) + return false; + + working_buffer = internal_buffer; + working_buffer.resize(bytes_read); + return true; + } + + void checkResult(int code) const { handle.checkResult(code); } + [[noreturn]] void showError(const String & message) const { handle.showError(message); } + + HandleHolder handle; +}; + + +class ZipArchiveReader::FileEnumeratorImpl : public FileEnumerator +{ +public: + explicit FileEnumeratorImpl(HandleHolder && handle_) : handle(std::move(handle_)) {} + + const String & getFileName() const override { return handle.getFileName(); } + const FileInfo & getFileInfo() const override { return handle.getFileInfo(); } + bool nextFile() override { return handle.nextFile(); } + + /// Releases owned handle to pass it to a read buffer. + HandleHolder releaseHandle() && { return std::move(handle); } + +private: + HandleHolder handle; +}; + + +namespace +{ + /// Provides a set of functions allowing the minizip library to read its input + /// from a SeekableReadBuffer instead of an ordinary file in the local filesystem. + class StreamFromReadBuffer + { + public: + static RawHandle open(std::unique_ptr archive_read_buffer, UInt64 archive_size) + { + StreamFromReadBuffer::Opaque opaque{std::move(archive_read_buffer), archive_size}; + + zlib_filefunc64_def func_def; + func_def.zopen64_file = &StreamFromReadBuffer::openFileFunc; + func_def.zclose_file = &StreamFromReadBuffer::closeFileFunc; + func_def.zread_file = &StreamFromReadBuffer::readFileFunc; + func_def.zwrite_file = &StreamFromReadBuffer::writeFileFunc; + func_def.zseek64_file = &StreamFromReadBuffer::seekFunc; + func_def.ztell64_file = &StreamFromReadBuffer::tellFunc; + func_def.zerror_file = &StreamFromReadBuffer::testErrorFunc; + func_def.opaque = &opaque; + + return unzOpen2_64(/* path= */ nullptr, + &func_def); + } + + private: + std::unique_ptr read_buffer; + UInt64 start_offset = 0; + UInt64 total_size = 0; + bool at_end = false; + + struct Opaque + { + std::unique_ptr read_buffer; + UInt64 total_size = 0; + }; + + static void * openFileFunc(void * opaque, const void *, int) + { + auto & opq = *reinterpret_cast(opaque); + return new StreamFromReadBuffer(std::move(opq.read_buffer), opq.total_size); + } + + StreamFromReadBuffer(std::unique_ptr read_buffer_, UInt64 total_size_) + : read_buffer(std::move(read_buffer_)), start_offset(read_buffer->getPosition()), total_size(total_size_) {} + + static int closeFileFunc(void *, void * stream) + { + delete reinterpret_cast(stream); + return ZIP_OK; + } + + static StreamFromReadBuffer & get(void * ptr) + { + return *reinterpret_cast(ptr); + } + + static int testErrorFunc(void *, void *) + { + return ZIP_OK; + } + + static unsigned long readFileFunc(void *, void * stream, void * buf, unsigned long size) // NOLINT(google-runtime-int) + { + auto & strm = get(stream); + if (strm.at_end) + return 0; + auto read_bytes = strm.read_buffer->read(reinterpret_cast(buf), size); + return read_bytes; + } + + static ZPOS64_T tellFunc(void *, void * stream) + { + auto & strm = get(stream); + if (strm.at_end) + return strm.total_size; + auto pos = strm.read_buffer->getPosition() - strm.start_offset; + return pos; + } + + static long seekFunc(void *, void * stream, ZPOS64_T offset, int origin) // NOLINT(google-runtime-int) + { + auto & strm = get(stream); + if (origin == SEEK_END) + { + /// Our implementations of SeekableReadBuffer don't support SEEK_END, + /// but the minizip library needs it, so we have to simulate it here. + strm.at_end = true; + return ZIP_OK; + } + strm.at_end = false; + if (origin == SEEK_SET) + offset += strm.start_offset; + strm.read_buffer->seek(offset, origin); + return ZIP_OK; + } + + static unsigned long writeFileFunc(void *, void *, const void *, unsigned long) // NOLINT(google-runtime-int) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "StreamFromReadBuffer::writeFile must not be called"); + } + }; +} + + +ZipArchiveReader::ZipArchiveReader(const String & path_to_archive_) + : path_to_archive(path_to_archive_) +{ + init(); + +} + +ZipArchiveReader::ZipArchiveReader( + const String & path_to_archive_, const ReadArchiveFunction & archive_read_function_, UInt64 archive_size_) + : path_to_archive(path_to_archive_), archive_read_function(archive_read_function_), archive_size(archive_size_) +{ + init(); +} + +void ZipArchiveReader::init() +{ + /// Prepare the first handle in `free_handles` and check that the archive can be read. + releaseRawHandle(acquireRawHandle()); +} + +ZipArchiveReader::~ZipArchiveReader() +{ + /// Close all `free_handles`. + for (RawHandle free_handle : free_handles) + { + try + { + checkResult(unzClose(free_handle)); + } + catch (...) + { + tryLogCurrentException("ZipArchiveReader"); + } + } +} + +bool ZipArchiveReader::fileExists(const String & filename) +{ + return acquireHandle().tryLocateFile(filename); +} + +ZipArchiveReader::FileInfo ZipArchiveReader::getFileInfo(const String & filename) +{ + auto handle = acquireHandle(); + handle.locateFile(filename); + return handle.getFileInfo(); +} + +std::unique_ptr ZipArchiveReader::firstFile() +{ + auto handle = acquireHandle(); + if (!handle.firstFile()) + return nullptr; + return std::make_unique(std::move(handle)); +} + +std::unique_ptr ZipArchiveReader::readFile(const String & filename) +{ + auto handle = acquireHandle(); + handle.locateFile(filename); + return std::make_unique(std::move(handle)); +} + +std::unique_ptr ZipArchiveReader::readFile(std::unique_ptr enumerator) +{ + if (!dynamic_cast(enumerator.get())) + throw Exception("Wrong enumerator passed to readFile()", ErrorCodes::LOGICAL_ERROR); + auto enumerator_impl = std::unique_ptr(static_cast(enumerator.release())); + auto handle = std::move(*enumerator_impl).releaseHandle(); + return std::make_unique(std::move(handle)); +} + +std::unique_ptr ZipArchiveReader::nextFile(std::unique_ptr read_buffer) +{ + if (!dynamic_cast(read_buffer.get())) + throw Exception("Wrong ReadBuffer passed to nextFile()", ErrorCodes::LOGICAL_ERROR); + auto read_buffer_from_zip = std::unique_ptr(static_cast(read_buffer.release())); + auto handle = std::move(*read_buffer_from_zip).releaseHandle(); + if (!handle.nextFile()) + return nullptr; + return std::make_unique(std::move(handle)); +} + +void ZipArchiveReader::setPassword(const String & password_) +{ + std::lock_guard lock{mutex}; + password = password_; +} + +ZipArchiveReader::HandleHolder ZipArchiveReader::acquireHandle() +{ + return HandleHolder{std::static_pointer_cast(shared_from_this())}; +} + +ZipArchiveReader::RawHandle ZipArchiveReader::acquireRawHandle() +{ + std::lock_guard lock{mutex}; + + if (!free_handles.empty()) + { + RawHandle free_handle = free_handles.back(); + free_handles.pop_back(); + return free_handle; + } + + RawHandle new_handle = nullptr; + if (archive_read_function) + new_handle = StreamFromReadBuffer::open(archive_read_function(), archive_size); + else + new_handle = unzOpen64(path_to_archive.c_str()); + + if (!new_handle) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open zip archive {}", quoteString(path_to_archive)); + + return new_handle; +} + +void ZipArchiveReader::releaseRawHandle(RawHandle handle_) +{ + if (!handle_) + return; + + std::lock_guard lock{mutex}; + free_handles.push_back(handle_); +} + +void ZipArchiveReader::checkResult(int code) const +{ + if (code >= UNZ_OK) + return; + + String message = "Code= "; + switch (code) + { + case UNZ_OK: return; + case UNZ_ERRNO: message += "ERRNO, errno= " + String{strerror(errno)}; break; + case UNZ_PARAMERROR: message += "PARAMERROR"; break; + case UNZ_BADZIPFILE: message += "BADZIPFILE"; break; + case UNZ_INTERNALERROR: message += "INTERNALERROR"; break; + case UNZ_CRCERROR: message += "CRCERROR"; break; + case UNZ_BADPASSWORD: message += "BADPASSWORD"; break; + default: message += std::to_string(code); break; + } + showError(message); +} + +void ZipArchiveReader::showError(const String & message) const +{ + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack zip archive {}: {}", quoteString(path_to_archive), message); +} + +} + +#endif diff --git a/src/IO/Archives/ZipArchiveReader.h b/src/IO/Archives/ZipArchiveReader.h new file mode 100644 index 00000000000..6932a93e23f --- /dev/null +++ b/src/IO/Archives/ZipArchiveReader.h @@ -0,0 +1,86 @@ +#pragma once + +#include + +#if USE_MINIZIP +#include +#include +#include +#include +#include + + +namespace DB +{ +class ReadBuffer; +class ReadBufferFromFileBase; +class SeekableReadBuffer; + +/// Implementation of IArchiveReader for reading zip archives. +class ZipArchiveReader : public shared_ptr_helper, public IArchiveReader +{ +public: + using CompressionMethod = ZipArchiveWriter::CompressionMethod; + + ~ZipArchiveReader() override; + + /// Returns true if there is a specified file in the archive. + bool fileExists(const String & filename) override; + + /// Returns the information about a file stored in the archive. + FileInfo getFileInfo(const String & filename) override; + + /// Starts enumerating files in the archive. + std::unique_ptr firstFile() override; + + /// Starts reading a file from the archive. The function returns a read buffer, + /// you can read that buffer to extract uncompressed data from the archive. + /// Several read buffers can be used at the same time in parallel. + std::unique_ptr readFile(const String & filename) override; + + /// It's possible to convert a file enumerator to a read buffer and vice versa. + std::unique_ptr readFile(std::unique_ptr enumerator) override; + std::unique_ptr nextFile(std::unique_ptr read_buffer) override; + + /// Sets password used to decrypt the contents of the files in the archive. + void setPassword(const String & password_) override; + + /// Utility functions. + static CompressionMethod parseCompressionMethod(const String & str) { return ZipArchiveWriter::parseCompressionMethod(str); } + static void checkCompressionMethodIsEnabled(CompressionMethod method) { ZipArchiveWriter::checkCompressionMethodIsEnabled(method); } + static void checkEncryptionIsEnabled() { ZipArchiveWriter::checkEncryptionIsEnabled(); } + +private: + /// Constructs an archive's reader that will read from a file in the local filesystem. + explicit ZipArchiveReader(const String & path_to_archive_); + + /// Constructs an archive's reader that will read by making a read buffer by using + /// a specified function. + ZipArchiveReader(const String & path_to_archive_, const ReadArchiveFunction & archive_read_function_, UInt64 archive_size_); + + friend struct shared_ptr_helper; + class ReadBufferFromZipArchive; + class FileEnumeratorImpl; + class HandleHolder; + using RawHandle = void *; + + void init(); + + HandleHolder acquireHandle(); + RawHandle acquireRawHandle(); + void releaseRawHandle(RawHandle handle_); + + void checkResult(int code) const; + [[noreturn]] void showError(const String & message) const; + + const String path_to_archive; + const ReadArchiveFunction archive_read_function; + const UInt64 archive_size = 0; + String password; + std::vector free_handles; + mutable std::mutex mutex; +}; + +} + +#endif diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp new file mode 100644 index 00000000000..f5ecea5e5aa --- /dev/null +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -0,0 +1,385 @@ +#include + +#if USE_MINIZIP +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_PACK_ARCHIVE; + extern const int SUPPORT_IS_DISABLED; + extern const int LOGICAL_ERROR; +} + +using RawHandle = zipFile; + + +/// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor. +class ZipArchiveWriter::HandleHolder +{ +public: + HandleHolder() = default; + + explicit HandleHolder(const std::shared_ptr & writer_) : writer(writer_), raw_handle(writer->acquireRawHandle()) { } + + ~HandleHolder() + { + if (raw_handle) + { + try + { + int err = zipCloseFileInZip(raw_handle); + /// If err == ZIP_PARAMERROR the file is already closed. + if (err != ZIP_PARAMERROR) + checkResult(err); + } + catch (...) + { + tryLogCurrentException("ZipArchiveWriter"); + } + writer->releaseRawHandle(raw_handle); + } + } + + HandleHolder(HandleHolder && src) + { + *this = std::move(src); + } + + HandleHolder & operator =(HandleHolder && src) + { + writer = std::exchange(src.writer, nullptr); + raw_handle = std::exchange(src.raw_handle, nullptr); + return *this; + } + + RawHandle getRawHandle() const { return raw_handle; } + std::shared_ptr getWriter() const { return writer; } + + void checkResult(int code) const { writer->checkResult(code); } + +private: + std::shared_ptr writer; + RawHandle raw_handle = nullptr; +}; + + +/// This class represents a WriteBuffer actually returned by writeFile(). +class ZipArchiveWriter::WriteBufferFromZipArchive : public WriteBufferFromFileBase +{ +public: + WriteBufferFromZipArchive(HandleHolder && handle_, const String & filename_) + : WriteBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) + , handle(std::move(handle_)) + , filename(filename_) + { + auto compress_method = handle.getWriter()->compression_method; + auto compress_level = handle.getWriter()->compression_level; + checkCompressionMethodIsEnabled(static_cast(compress_method)); + + const char * password_cstr = nullptr; + const String & password_str = handle.getWriter()->password; + if (!password_str.empty()) + { + checkEncryptionIsEnabled(); + password_cstr = password_str.c_str(); + } + + RawHandle raw_handle = handle.getRawHandle(); + + checkResult(zipOpenNewFileInZip3_64( + raw_handle, + filename_.c_str(), + /* zipfi= */ nullptr, + /* extrafield_local= */ nullptr, + /* size_extrafield_local= */ 0, + /* extrafield_global= */ nullptr, + /* size_extrafield_global= */ 0, + /* comment= */ nullptr, + compress_method, + compress_level, + /* raw= */ false, + /* windowBits= */ 0, + /* memLevel= */ 0, + /* strategy= */ 0, + password_cstr, + /* crc_for_crypting= */ 0, + /* zip64= */ true)); + } + + ~WriteBufferFromZipArchive() override + { + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException("ZipArchiveWriter"); + } + } + + void sync() override { next(); } + std::string getFileName() const override { return filename; } + +private: + void nextImpl() override + { + if (!offset()) + return; + RawHandle raw_handle = handle.getRawHandle(); + checkResult(zipWriteInFileInZip(raw_handle, working_buffer.begin(), offset())); + } + + void checkResult(int code) const { handle.checkResult(code); } + + HandleHolder handle; + String filename; +}; + + +namespace +{ + /// Provides a set of functions allowing the minizip library to write its output + /// to a WriteBuffer instead of an ordinary file in the local filesystem. + class StreamFromWriteBuffer + { + public: + static RawHandle open(std::unique_ptr archive_write_buffer) + { + Opaque opaque{std::move(archive_write_buffer)}; + + zlib_filefunc64_def func_def; + func_def.zopen64_file = &StreamFromWriteBuffer::openFileFunc; + func_def.zclose_file = &StreamFromWriteBuffer::closeFileFunc; + func_def.zread_file = &StreamFromWriteBuffer::readFileFunc; + func_def.zwrite_file = &StreamFromWriteBuffer::writeFileFunc; + func_def.zseek64_file = &StreamFromWriteBuffer::seekFunc; + func_def.ztell64_file = &StreamFromWriteBuffer::tellFunc; + func_def.zerror_file = &StreamFromWriteBuffer::testErrorFunc; + func_def.opaque = &opaque; + + return zipOpen2_64( + /* path= */ nullptr, + /* append= */ false, + /* globalcomment= */ nullptr, + &func_def); + } + + private: + std::unique_ptr write_buffer; + UInt64 start_offset = 0; + + struct Opaque + { + std::unique_ptr write_buffer; + }; + + static void * openFileFunc(void * opaque, const void *, int) + { + Opaque & opq = *reinterpret_cast(opaque); + return new StreamFromWriteBuffer(std::move(opq.write_buffer)); + } + + explicit StreamFromWriteBuffer(std::unique_ptr write_buffer_) + : write_buffer(std::move(write_buffer_)), start_offset(write_buffer->count()) {} + + static int closeFileFunc(void *, void * stream) + { + delete reinterpret_cast(stream); + return ZIP_OK; + } + + static StreamFromWriteBuffer & get(void * ptr) + { + return *reinterpret_cast(ptr); + } + + static unsigned long writeFileFunc(void *, void * stream, const void * buf, unsigned long size) // NOLINT(google-runtime-int) + { + auto & strm = get(stream); + strm.write_buffer->write(reinterpret_cast(buf), size); + return size; + } + + static int testErrorFunc(void *, void *) + { + return ZIP_OK; + } + + static ZPOS64_T tellFunc(void *, void * stream) + { + auto & strm = get(stream); + auto pos = strm.write_buffer->count() - strm.start_offset; + return pos; + } + + static long seekFunc(void *, void *, ZPOS64_T, int) // NOLINT(google-runtime-int) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "StreamFromWriteBuffer::seek must not be called"); + } + + static unsigned long readFileFunc(void *, void *, void *, unsigned long) // NOLINT(google-runtime-int) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "StreamFromWriteBuffer::readFile must not be called"); + } + }; +} + + +ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_) + : ZipArchiveWriter(path_to_archive_, nullptr) +{ +} + +ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr archive_write_buffer_) + : path_to_archive(path_to_archive_) +{ + if (archive_write_buffer_) + handle = StreamFromWriteBuffer::open(std::move(archive_write_buffer_)); + else + handle = zipOpen64(path_to_archive.c_str(), /* append= */ false); + if (!handle) + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't create zip archive {}", quoteString(path_to_archive)); +} + +ZipArchiveWriter::~ZipArchiveWriter() +{ + if (handle) + { + try + { + checkResult(zipClose(handle, /* global_comment= */ nullptr)); + } + catch (...) + { + tryLogCurrentException("ZipArchiveWriter"); + } + } +} + +std::unique_ptr ZipArchiveWriter::writeFile(const String & filename) +{ + return std::make_unique(acquireHandle(), filename); +} + +bool ZipArchiveWriter::isWritingFile() const +{ + std::lock_guard lock{mutex}; + return !handle; +} + +void ZipArchiveWriter::setCompression(int compression_method_, int compression_level_) +{ + std::lock_guard lock{mutex}; + compression_method = compression_method_; + compression_level = compression_level_; +} + +void ZipArchiveWriter::setPassword(const String & password_) +{ + std::lock_guard lock{mutex}; + password = password_; +} + +ZipArchiveWriter::CompressionMethod ZipArchiveWriter::parseCompressionMethod(const String & str) +{ + if (str.empty()) + return CompressionMethod::kDeflate; /// Default compression method is DEFLATE. + else if (boost::iequals(str, "store")) + return CompressionMethod::kStore; + else if (boost::iequals(str, "deflate")) + return CompressionMethod::kDeflate; + else if (boost::iequals(str, "bzip2")) + return CompressionMethod::kBzip2; + else if (boost::iequals(str, "lzma")) + return CompressionMethod::kLzma; + else if (boost::iequals(str, "zstd")) + return CompressionMethod::kZstd; + else if (boost::iequals(str, "xz")) + return CompressionMethod::kXz; + else + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", str); +} + +/// Checks that a passed compression method can be used. +void ZipArchiveWriter::checkCompressionMethodIsEnabled(CompressionMethod method) +{ + switch (method) + { + case CompressionMethod::kStore: [[fallthrough]]; + case CompressionMethod::kDeflate: + case CompressionMethod::kLzma: + case CompressionMethod::kXz: + case CompressionMethod::kZstd: + return; + + case CompressionMethod::kBzip2: + { +#if USE_BZIP2 + return; +#else + throw Exception("BZIP2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif + } + } + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", static_cast(method)); +} + +/// Checks that encryption is enabled. +void ZipArchiveWriter::checkEncryptionIsEnabled() +{ +#if !USE_SSL + throw Exception("Encryption in zip archive is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif +} + +ZipArchiveWriter::HandleHolder ZipArchiveWriter::acquireHandle() +{ + return HandleHolder{std::static_pointer_cast(shared_from_this())}; +} + +RawHandle ZipArchiveWriter::acquireRawHandle() +{ + std::lock_guard lock{mutex}; + if (!handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot have more than one write buffer while writing a zip archive"); + return std::exchange(handle, nullptr); +} + +void ZipArchiveWriter::releaseRawHandle(RawHandle raw_handle_) +{ + std::lock_guard lock{mutex}; + handle = raw_handle_; +} + +void ZipArchiveWriter::checkResult(int code) const +{ + if (code >= ZIP_OK) + return; + + String message = "Code= "; + switch (code) + { + case ZIP_ERRNO: message += "ERRNO, errno= " + String{strerror(errno)}; break; + case ZIP_PARAMERROR: message += "PARAMERROR"; break; + case ZIP_BADZIPFILE: message += "BADZIPFILE"; break; + case ZIP_INTERNALERROR: message += "INTERNALERROR"; break; + default: message += std::to_string(code); break; + } + showError(message); +} + +void ZipArchiveWriter::showError(const String & message) const +{ + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't pack zip archive {}: {}", quoteString(path_to_archive), message); +} + +} + +#endif diff --git a/src/IO/Archives/ZipArchiveWriter.h b/src/IO/Archives/ZipArchiveWriter.h new file mode 100644 index 00000000000..76f8dd8e9e5 --- /dev/null +++ b/src/IO/Archives/ZipArchiveWriter.h @@ -0,0 +1,97 @@ +#pragma once + +#include + +#if USE_MINIZIP +#include +#include +#include + + +namespace DB +{ +class WriteBuffer; +class WriteBufferFromFileBase; + +/// Implementation of IArchiveWriter for writing zip archives. +class ZipArchiveWriter : public shared_ptr_helper, public IArchiveWriter +{ +public: + /// Destructors finalizes writing the archive. + ~ZipArchiveWriter() override; + + /// Starts writing a file to the archive. The function returns a write buffer, + /// any data written to that buffer will be compressed and then put to the archive. + /// You can keep only one such buffer at a time, a buffer returned by previous call + /// of the function `writeFile()` should be destroyed before next call of `writeFile()`. + std::unique_ptr writeFile(const String & filename) override; + + /// Returns true if there is an active instance of WriteBuffer returned by writeFile(). + /// This function should be used mostly for debugging purposes. + bool isWritingFile() const override; + + /// Supported compression methods. + enum class CompressionMethod + { + /// See mz.h + kStore = 0, + kDeflate = 8, + kBzip2 = 12, + kLzma = 14, + kZstd = 93, + kXz = 95, + }; + + /// Some compression levels. + enum class CompressionLevels + { + kDefault = kDefaultCompressionLevel, + kFast = 2, + kNormal = 6, + kBest = 9, + }; + + /// Sets compression method and level. + /// Changing them will affect next file in the archive. + void setCompression(int compression_method_, int compression_level_) override; + + /// Sets password. Only contents of the files are encrypted, + /// names of files are not encrypted. + /// Changing the password will affect next file in the archive. + void setPassword(const String & password_) override; + + /// Utility functions. + static CompressionMethod parseCompressionMethod(const String & str); + static void checkCompressionMethodIsEnabled(CompressionMethod method); + static void checkEncryptionIsEnabled(); + +private: + /// Constructs an archive that will be written as a file in the local filesystem. + explicit ZipArchiveWriter(const String & path_to_archive_); + + /// Constructs an archive that will be written by using a specified `archive_write_buffer_`. + ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr archive_write_buffer_); + + friend struct shared_ptr_helper; + class WriteBufferFromZipArchive; + class HandleHolder; + using RawHandle = void *; + + HandleHolder acquireHandle(); + RawHandle acquireRawHandle(); + void releaseRawHandle(RawHandle raw_handle_); + + void checkResult(int code) const; + [[noreturn]] void showError(const String & message) const; + + const String path_to_archive; + int compression_method = static_cast(CompressionMethod::kDeflate); + int compression_level = kDefaultCompressionLevel; + String password; + RawHandle handle = nullptr; + mutable std::mutex mutex; +}; + +} + +#endif diff --git a/src/IO/Archives/createArchiveReader.cpp b/src/IO/Archives/createArchiveReader.cpp new file mode 100644 index 00000000000..6ebab000a18 --- /dev/null +++ b/src/IO/Archives/createArchiveReader.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_UNPACK_ARCHIVE; + extern const int SUPPORT_IS_DISABLED; +} + + +std::shared_ptr createArchiveReader(const String & path_to_archive) +{ + return createArchiveReader(path_to_archive, {}, 0); +} + + +std::shared_ptr createArchiveReader( + const String & path_to_archive, + [[maybe_unused]] const std::function()> & archive_read_function, + [[maybe_unused]] size_t archive_size) +{ + if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) + { +#if USE_MINIZIP + return ZipArchiveReader::create(path_to_archive, archive_read_function, archive_size); +#else + throw Exception("minizip library is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif + } + else + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Cannot determine the type of archive {}", path_to_archive); +} + +} diff --git a/src/IO/Archives/createArchiveReader.h b/src/IO/Archives/createArchiveReader.h new file mode 100644 index 00000000000..9e1073b9481 --- /dev/null +++ b/src/IO/Archives/createArchiveReader.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class IArchiveReader; +class SeekableReadBuffer; + +/// Starts reading a specified archive in the local filesystem. +std::shared_ptr createArchiveReader(const String & path_to_archive); + +/// Starts reading a specified archive, the archive is read by using a specified read buffer, +/// `path_to_archive` is used only to determine the archive's type. +std::shared_ptr createArchiveReader( + const String & path_to_archive, + const std::function()> & archive_read_function, + size_t archive_size); + +} diff --git a/src/IO/Archives/createArchiveWriter.cpp b/src/IO/Archives/createArchiveWriter.cpp new file mode 100644 index 00000000000..26cbde8c363 --- /dev/null +++ b/src/IO/Archives/createArchiveWriter.cpp @@ -0,0 +1,38 @@ +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_PACK_ARCHIVE; + extern const int SUPPORT_IS_DISABLED; +} + + +std::shared_ptr createArchiveWriter(const String & path_to_archive) +{ + return createArchiveWriter(path_to_archive, nullptr); +} + + +std::shared_ptr createArchiveWriter( + const String & path_to_archive, + [[maybe_unused]] std::unique_ptr archive_write_buffer) +{ + if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) + { +#if USE_MINIZIP + return ZipArchiveWriter::create(path_to_archive, std::move(archive_write_buffer)); +#else + throw Exception("minizip library is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif + } + else + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Cannot determine the type of archive {}", path_to_archive); +} + +} diff --git a/src/IO/Archives/createArchiveWriter.h b/src/IO/Archives/createArchiveWriter.h new file mode 100644 index 00000000000..51ffd4d1144 --- /dev/null +++ b/src/IO/Archives/createArchiveWriter.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class IArchiveWriter; +class WriteBuffer; + +/// Starts writing a specified archive in the local filesystem. +std::shared_ptr createArchiveWriter(const String & path_to_archive); + +/// Starts writing a specified archive, the archive is written by using a specified write buffer, +/// `path_to_archive` is used only to determine the archive's type. +std::shared_ptr createArchiveWriter(const String & path_to_archive, std::unique_ptr archive_write_buffer); + +} diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp new file mode 100644 index 00000000000..c6b012a9914 --- /dev/null +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -0,0 +1,341 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB::ErrorCodes +{ + extern const int CANNOT_UNPACK_ARCHIVE; +} + +namespace fs = std::filesystem; +using namespace DB; + + +class ArchiveReaderAndWriterTest : public ::testing::TestWithParam +{ +public: + ArchiveReaderAndWriterTest() + { + const char * archive_file_ext = GetParam(); + path_to_archive = temp_folder.path() + "/archive" + archive_file_ext; + fs::create_directories(temp_folder.path()); + } + + const String & getPathToArchive() const { return path_to_archive; } + + static void expectException(int code, const String & message, const std::function & func) + { + try + { + func(); + } + catch (Exception & e) + { + if ((e.code() != code) || (e.message().find(message) == String::npos)) + throw; + } + } + +private: + Poco::TemporaryFile temp_folder; + String path_to_archive; +}; + + +TEST_P(ArchiveReaderAndWriterTest, EmptyArchive) +{ + /// Make an archive. + { + createArchiveWriter(getPathToArchive()); + } + + /// The created archive can be found in the local filesystem. + ASSERT_TRUE(fs::exists(getPathToArchive())); + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + EXPECT_FALSE(reader->fileExists("nofile.txt")); + + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found", + [&]{ reader->getFileInfo("nofile.txt"); }); + + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found", + [&]{ reader->readFile("nofile.txt"); }); + + EXPECT_EQ(reader->firstFile(), nullptr); +} + + +TEST_P(ArchiveReaderAndWriterTest, SingleFileInArchive) +{ + /// Make an archive. + std::string_view contents = "The contents of a.txt"; + { + auto writer = createArchiveWriter(getPathToArchive()); + { + auto out = writer->writeFile("a.txt"); + writeString(contents, *out); + } + } + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + ASSERT_TRUE(reader->fileExists("a.txt")); + + auto file_info = reader->getFileInfo("a.txt"); + EXPECT_EQ(file_info.uncompressed_size, contents.size()); + EXPECT_GT(file_info.compressed_size, 0); + + { + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + } + + { + /// Use an enumerator. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, contents.size()); + EXPECT_GT(enumerator->getFileInfo().compressed_size, 0); + EXPECT_FALSE(enumerator->nextFile()); + } + + { + /// Use converting an enumerator to a reading buffer and vice versa. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + auto in = reader->readFile(std::move(enumerator)); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + enumerator = reader->nextFile(std::move(in)); + EXPECT_EQ(enumerator, nullptr); + } + + { + /// Wrong using of an enumerator throws an exception. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_FALSE(enumerator->nextFile()); + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", + [&]{ enumerator->getFileName(); }); + + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", + [&] { reader->readFile(std::move(enumerator)); }); + } +} + + +TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive) +{ + /// Make an archive. + std::string_view a_contents = "The contents of a.txt"; + std::string_view c_contents = "The contents of b/c.txt"; + { + auto writer = createArchiveWriter(getPathToArchive()); + { + auto out = writer->writeFile("a.txt"); + writeString(a_contents, *out); + } + { + auto out = writer->writeFile("b/c.txt"); + writeString(c_contents, *out); + } + } + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + ASSERT_TRUE(reader->fileExists("a.txt")); + ASSERT_TRUE(reader->fileExists("b/c.txt")); + + EXPECT_EQ(reader->getFileInfo("a.txt").uncompressed_size, a_contents.size()); + EXPECT_EQ(reader->getFileInfo("b/c.txt").uncompressed_size, c_contents.size()); + + { + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + } + + { + auto in = reader->readFile("b/c.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, c_contents); + } + + { + /// Read a.txt again. + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + } + + { + /// Use an enumerator. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, a_contents.size()); + EXPECT_TRUE(enumerator->nextFile()); + EXPECT_EQ(enumerator->getFileName(), "b/c.txt"); + EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, c_contents.size()); + EXPECT_FALSE(enumerator->nextFile()); + } + + { + /// Use converting an enumerator to a reading buffer and vice versa. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + auto in = reader->readFile(std::move(enumerator)); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + enumerator = reader->nextFile(std::move(in)); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "b/c.txt"); + in = reader->readFile(std::move(enumerator)); + readStringUntilEOF(str, *in); + EXPECT_EQ(str, c_contents); + enumerator = reader->nextFile(std::move(in)); + EXPECT_EQ(enumerator, nullptr); + } +} + + +TEST_P(ArchiveReaderAndWriterTest, InMemory) +{ + String archive_in_memory; + + /// Make an archive. + std::string_view a_contents = "The contents of a.txt"; + std::string_view b_contents = "The contents of b.txt"; + { + auto writer = createArchiveWriter(getPathToArchive(), std::make_unique(archive_in_memory)); + { + auto out = writer->writeFile("a.txt"); + writeString(a_contents, *out); + } + { + auto out = writer->writeFile("b.txt"); + writeString(b_contents, *out); + } + } + + /// The created archive is really in memory. + ASSERT_FALSE(fs::exists(getPathToArchive())); + + /// Read the archive. + auto read_archive_func = [&]() -> std::unique_ptr { return std::make_unique(archive_in_memory); }; + auto reader = createArchiveReader(getPathToArchive(), read_archive_func, archive_in_memory.size()); + + ASSERT_TRUE(reader->fileExists("a.txt")); + ASSERT_TRUE(reader->fileExists("b.txt")); + + EXPECT_EQ(reader->getFileInfo("a.txt").uncompressed_size, a_contents.size()); + EXPECT_EQ(reader->getFileInfo("b.txt").uncompressed_size, b_contents.size()); + + { + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + } + + { + auto in = reader->readFile("b.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, b_contents); + } + + { + /// Read a.txt again. + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + } +} + + +TEST_P(ArchiveReaderAndWriterTest, Password) +{ + /// Make an archive. + std::string_view contents = "The contents of a.txt"; + { + auto writer = createArchiveWriter(getPathToArchive()); + writer->setPassword("Qwe123"); + { + auto out = writer->writeFile("a.txt"); + writeString(contents, *out); + } + } + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + /// Try to read without a password. + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required", + [&]{ reader->readFile("a.txt"); }); + + { + /// Try to read with a wrong password. + reader->setPassword("123Qwe"); + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password", + [&]{ reader->readFile("a.txt"); }); + } + + { + /// Reading with the right password is successful. + reader->setPassword("Qwe123"); + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + } +} + + +TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist) +{ + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open", + [&]{ createArchiveReader(getPathToArchive()); }); +} + + +namespace +{ + const char * supported_archive_file_exts[] = + { +#if USE_MINIZIP + ".zip", +#endif + }; +} + +INSTANTIATE_TEST_SUITE_P(All, ArchiveReaderAndWriterTest, ::testing::ValuesIn(supported_archive_file_exts)); From d9bdbf47b7cac3a8ae2658bbbec54f05b3653402 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 5 Feb 2022 02:47:46 +0700 Subject: [PATCH 13/39] ReadBufferFromMemory now can seek backwards after been read up to EOF. --- src/IO/ReadBufferFromMemory.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/IO/ReadBufferFromMemory.cpp b/src/IO/ReadBufferFromMemory.cpp index 98c39c833b0..d0863878797 100644 --- a/src/IO/ReadBufferFromMemory.cpp +++ b/src/IO/ReadBufferFromMemory.cpp @@ -12,31 +12,33 @@ off_t ReadBufferFromMemory::seek(off_t offset, int whence) { if (whence == SEEK_SET) { - if (offset >= 0 && working_buffer.begin() + offset < working_buffer.end()) + if (offset >= 0 && internal_buffer.begin() + offset < internal_buffer.end()) { - pos = working_buffer.begin() + offset; - return size_t(pos - working_buffer.begin()); + pos = internal_buffer.begin() + offset; + working_buffer = internal_buffer; /// We need to restore `working_buffer` in case the position was at EOF before this seek(). + return size_t(pos - internal_buffer.begin()); } else throw Exception( "Seek position is out of bounds. " "Offset: " - + std::to_string(offset) + ", Max: " + std::to_string(size_t(working_buffer.end() - working_buffer.begin())), + + std::to_string(offset) + ", Max: " + std::to_string(size_t(internal_buffer.end() - internal_buffer.begin())), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); } else if (whence == SEEK_CUR) { Position new_pos = pos + offset; - if (new_pos >= working_buffer.begin() && new_pos < working_buffer.end()) + if (new_pos >= internal_buffer.begin() && new_pos < internal_buffer.end()) { pos = new_pos; - return size_t(pos - working_buffer.begin()); + working_buffer = internal_buffer; /// We need to restore `working_buffer` in case the position was at EOF before this seek(). + return size_t(pos - internal_buffer.begin()); } else throw Exception( "Seek position is out of bounds. " "Offset: " - + std::to_string(offset) + ", Max: " + std::to_string(size_t(working_buffer.end() - working_buffer.begin())), + + std::to_string(offset) + ", Max: " + std::to_string(size_t(internal_buffer.end() - internal_buffer.begin())), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); } else @@ -45,7 +47,7 @@ off_t ReadBufferFromMemory::seek(off_t offset, int whence) off_t ReadBufferFromMemory::getPosition() { - return pos - working_buffer.begin(); + return pos - internal_buffer.begin(); } } From 7674bc986e963dd74392fbe138408d29d74dd325 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 16 Jan 2022 16:32:32 +0800 Subject: [PATCH 14/39] Disable projection when there is JOIN or SAMPLE --- src/Interpreters/ExpressionAnalyzer.cpp | 5 ++- src/Storages/MergeTree/MergeTreeData.cpp | 37 ++++++++++++------- .../01710_projection_with_joins.reference | 2 + .../01710_projection_with_joins.sql | 15 +++++++- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index c195cb93c5e..4a5f18a408f 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -944,7 +944,10 @@ static std::unique_ptr buildJoinedPlan( * - JOIN tables will need aliases to correctly resolve USING clause. */ auto interpreter = interpretSubquery( - join_element.table_expression, context, original_right_columns, query_options.copy().setWithAllColumns().ignoreAlias(false)); + join_element.table_expression, + context, + original_right_columns, + query_options.copy().setWithAllColumns().ignoreProjections(false).ignoreAlias(false)); auto joined_plan = std::make_unique(); interpreter->buildQueryPlan(*joined_plan); { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e5771c016e5..0b29545120c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -4630,23 +4631,33 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) return std::nullopt; - const auto & query_ptr = query_info.original_query; - - if (auto * select = query_ptr->as(); select) - { - // Currently projections don't support final yet. - if (select->final()) - return std::nullopt; - - // Currently projections don't support ARRAY JOIN yet. - if (select->arrayJoinExpressionList().first) - return std::nullopt; - } - // Currently projections don't support sampling yet. if (settings.parallel_replicas_count > 1) return std::nullopt; + auto query_ptr = query_info.original_query; + auto * select_query = query_ptr->as(); + if (!select_query) + return std::nullopt; + + // Currently projections don't support final yet. + if (select_query->final()) + return std::nullopt; + + // Currently projections don't support sample yet. + if (select_query->sampleSize()) + return std::nullopt; + + // Currently projections don't support ARRAY JOIN yet. + if (select_query->arrayJoinExpressionList().first) + return std::nullopt; + + // In order to properly analyze joins, aliases should be recognized. However, aliases get lost during projection analysis. + // Let's disable projection if there are any JOIN clauses. + // TODO: We need a better identifier resolution mechanism for projection analysis. + if (select_query->join()) + return std::nullopt; + InterpreterSelectQuery select( query_ptr, query_context, diff --git a/tests/queries/0_stateless/01710_projection_with_joins.reference b/tests/queries/0_stateless/01710_projection_with_joins.reference index e69de29bb2d..4792e70f333 100644 --- a/tests/queries/0_stateless/01710_projection_with_joins.reference +++ b/tests/queries/0_stateless/01710_projection_with_joins.reference @@ -0,0 +1,2 @@ +2 +3 diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql index fcd1c586fa3..a9aaf6325d4 100644 --- a/tests/queries/0_stateless/01710_projection_with_joins.sql +++ b/tests/queries/0_stateless/01710_projection_with_joins.sql @@ -1,8 +1,21 @@ drop table if exists t; -create table t (s UInt16, l UInt16, projection p (select s, l order by l)) engine MergeTree order by s; +create table t (s UInt16, l UInt16, projection p (select s, l order by l)) engine MergeTree order by s; select s from t join (select toUInt16(1) as s) x using (s) settings allow_experimental_projection_optimization = 1; select s from t join (select toUInt16(1) as s) x using (s) settings allow_experimental_projection_optimization = 0; drop table t; + +drop table if exists mt; +create table mt (id1 Int8, id2 Int8) Engine=MergeTree order by tuple(); +select id1 as alias1 from mt all inner join (select id2 as alias1 from mt) as t using (alias1) settings allow_experimental_projection_optimization = 1; +select id1 from mt all inner join (select id2 as id1 from mt) as t using (id1) settings allow_experimental_projection_optimization = 1; +select id2 as id1 from mt all inner join (select id1 from mt) as t using (id1) settings allow_experimental_projection_optimization = 1; +drop table mt; + +drop table if exists j; +create table j (id1 Int8, id2 Int8, projection p (select id1, id2 order by id2)) Engine=MergeTree order by id1 settings index_granularity = 1; +insert into j select number, number from numbers(10); +select id1 as alias1 from j all inner join (select id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +drop table j; From 27fcefd315ab38704beea691c044326d092308f4 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 19:20:22 +0800 Subject: [PATCH 15/39] Disable projection when doing parallel replica reading --- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0b29545120c..aaf6cf3884e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4631,8 +4631,8 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) return std::nullopt; - // Currently projections don't support sampling yet. - if (settings.parallel_replicas_count > 1) + // Currently projections don't support parallel replicas reading yet. + if (settings.parallel_replicas_count > 1 || settings.max_parallel_replicas > 1) return std::nullopt; auto query_ptr = query_info.original_query; From 98857de82ba39c6ab2081a01f054793519c66c2f Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:31:24 +0800 Subject: [PATCH 16/39] Disable projection for high-order storages --- src/Storages/StorageBuffer.cpp | 5 ++++- src/Storages/StorageMaterializedView.cpp | 4 ++++ src/Storages/StorageMerge.cpp | 6 +++++- src/Storages/StorageProxy.h | 3 +++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 9f0cb478bb6..f97c09471c3 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -199,6 +199,8 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage( if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); + /// TODO: Find a way to support projections for StorageBuffer + query_info.ignore_projections = true; return destination->getQueryProcessingStage(local_context, to_stage, destination->getInMemoryMetadataPtr(), query_info); } @@ -365,9 +367,10 @@ void StorageBuffer::read( */ if (processed_stage > QueryProcessingStage::FetchColumns) { + /// TODO: Find a way to support projections for StorageBuffer auto interpreter = InterpreterSelectQuery( query_info.query, local_context, std::move(pipe_from_buffers), - SelectQueryOptions(processed_stage)); + SelectQueryOptions(processed_stage).ignoreProjections()); interpreter.buildQueryPlan(buffers_plan); } else diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 49111e02b11..7c5ef5ac04c 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -135,6 +135,10 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( const StorageMetadataPtr &, SelectQueryInfo & query_info) const { + /// TODO: Find a way to support projections for StorageMaterializedView. Why do we use different + /// metadata for materialized view and target table? If they are the same, we can get rid of all + /// converting and use it just like a normal view. + query_info.ignore_projections = true; return getTargetTable()->getQueryProcessingStage(local_context, to_stage, getTargetTable()->getInMemoryMetadataPtr(), query_info); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 0dc6f2931d3..433fdb5b0b5 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -188,6 +188,8 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( size_t selected_table_size = 0; + /// TODO: Find a way to support projections for StorageMerge + query_info.ignore_projections = true; for (const auto & iterator : database_table_iterators) { while (iterator->isValid()) @@ -471,7 +473,9 @@ Pipe StorageMerge::createSources( modified_context->setSetting("max_threads", streams_num); modified_context->setSetting("max_streams_to_max_threads_ratio", 1); - InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, SelectQueryOptions(processed_stage)}; + /// TODO: Find a way to support projections for StorageMerge + InterpreterSelectQuery interpreter{ + modified_query_info.query, modified_context, SelectQueryOptions(processed_stage).ignoreProjections()}; pipe = QueryPipelineBuilder::getPipe(interpreter.buildQueryPipeline()); diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 304f84c02eb..894b470ef22 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -37,6 +38,8 @@ public: const StorageMetadataPtr &, SelectQueryInfo & info) const override { + /// TODO: Find a way to support projections for StorageProxy + info.ignore_projections = true; return getNested()->getQueryProcessingStage(context, to_stage, getNested()->getInMemoryMetadataPtr(), info); } From 82f31e1abb2ae510b99e8bc6aaf7b969092102f5 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:31:45 +0800 Subject: [PATCH 17/39] Fix tests when projection is enabled Avoid using count() in quota related tests count() can subject to many optimization techniques, which is unstable for testing quota usage. --- tests/integration/test_quota/test.py | 8 ++++---- ...01505_trivial_count_with_partition_predicate.reference | 2 +- .../01505_trivial_count_with_partition_predicate.sql | 2 +- .../0_stateless/01710_minmax_count_projection.reference | 4 ++-- .../queries/0_stateless/01710_minmax_count_projection.sql | 2 +- tests/queries/0_stateless/01710_projection_with_joins.sql | 2 +- tests/queries/0_stateless/01739_index_hint.reference | 4 ++-- tests/queries/0_stateless/01739_index_hint.sql | 2 +- tests/queries/0_stateless/01748_partition_id_pruning.sql | 2 ++ 9 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 4149987996b..83ee32bd7dd 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -94,9 +94,9 @@ def test_quota_from_users_xml(): system_quota_usage( [["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]]) - instance.query("SELECT COUNT() from test_table") + instance.query("SELECT SUM(x) from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 51, "\\N", 208, "\\N", 100, 1000, 400, "\\N", "\\N"]]) def test_simpliest_quota(): @@ -125,9 +125,9 @@ def test_tracking_quota(): system_quota_usage( [["myQuota", "default", 31556952, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]]) - instance.query("SELECT COUNT() from test_table") + instance.query("SELECT SUM(x) from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 2, "\\N", 2, "\\N", 0, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 2, "\\N", 2, "\\N", 0, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 100, "\\N", 400, "\\N", "\\N"]]) def test_exceed_quota(): diff --git a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference index b8b8fae2830..5abc312652d 100644 --- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference +++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference @@ -5,7 +5,7 @@ 0 1 0 -2 +1 0 4 6 diff --git a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql index ecf0b791a49..e4e2e3dd76a 100644 --- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql +++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql @@ -31,7 +31,7 @@ select count() from test_tuple where toDate(p) > '2020-09-01'; -- optimized select count() from test_tuple where toDate(p) > '2020-09-01' and i = 1; -- optimized -select count() from test_tuple where i > 1; +select count() from test_tuple where i > 2; -- optimized select count() from test_tuple where i < 1; -- non-optimized diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.reference b/tests/queries/0_stateless/01710_minmax_count_projection.reference index 77649f536f5..b13738a66de 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.reference +++ b/tests/queries/0_stateless/01710_minmax_count_projection.reference @@ -13,7 +13,7 @@ 1 1 1 -\N 2021-10-27 10:00:00 4 -2021-10-24 10:00:00 +\N 2021-10-27 10:00:00 3 +0 2021-10-24 10:00:00 0 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index 713241ada72..c0f2250cc0f 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -53,7 +53,7 @@ select count() from d group by toDate(dt); -- fuzz crash SELECT pointInEllipses(min(j), NULL), max(dt), count('0.0000000007') FROM d WHERE toDate(dt) >= '2021-10-25'; -SELECT min(dt) FROM d PREWHERE ceil(j) <= 0; +SELECT min(j) FROM d PREWHERE ceil(j) <= 0; SELECT min(dt) FROM d PREWHERE ((0.9998999834060669 AND 1023) AND 255) <= ceil(j); SELECT count('') AND NULL FROM d PREWHERE ceil(j) <= NULL; diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql index a9aaf6325d4..a54ba21fd27 100644 --- a/tests/queries/0_stateless/01710_projection_with_joins.sql +++ b/tests/queries/0_stateless/01710_projection_with_joins.sql @@ -17,5 +17,5 @@ drop table mt; drop table if exists j; create table j (id1 Int8, id2 Int8, projection p (select id1, id2 order by id2)) Engine=MergeTree order by id1 settings index_granularity = 1; insert into j select number, number from numbers(10); -select id1 as alias1 from j all inner join (select id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +select id1 as alias1 from j all inner join (select id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) settings allow_experimental_projection_optimization = 1; drop table j; diff --git a/tests/queries/0_stateless/01739_index_hint.reference b/tests/queries/0_stateless/01739_index_hint.reference index 6aa40c5d302..71dfab29154 100644 --- a/tests/queries/0_stateless/01739_index_hint.reference +++ b/tests/queries/0_stateless/01739_index_hint.reference @@ -25,8 +25,8 @@ drop table tbl; drop table if exists XXXX; create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128; insert into XXXX select number*60, 0 from numbers(100000); -SELECT count() FROM XXXX WHERE indexHint(t = 42); -128 +SELECT sum(t) FROM XXXX WHERE indexHint(t = 42); +487680 drop table if exists XXXX; create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192; insert into XXXX select number*60, 0 from numbers(100000); diff --git a/tests/queries/0_stateless/01739_index_hint.sql b/tests/queries/0_stateless/01739_index_hint.sql index 28395c2dc1d..30dfa43d334 100644 --- a/tests/queries/0_stateless/01739_index_hint.sql +++ b/tests/queries/0_stateless/01739_index_hint.sql @@ -22,7 +22,7 @@ create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings inde insert into XXXX select number*60, 0 from numbers(100000); -SELECT count() FROM XXXX WHERE indexHint(t = 42); +SELECT sum(t) FROM XXXX WHERE indexHint(t = 42); drop table if exists XXXX; diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index e0d45884c60..9a26dd8daba 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -14,6 +14,8 @@ select * from x where _partition_id in (select partitionId(number + 1) from numb -- trivial count optimization test set max_rows_to_read = 2; -- one row for subquery + subquery itself +-- TODO: Relax the limits because we might build prepared set twice with _minmax_count_projection +set max_rows_to_read = 3; select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); drop table x; From a0ab7a01f12d4a7447d3f26fedbda77d9f95492e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:32:19 +0800 Subject: [PATCH 18/39] Adapt minmax_count_projection with ModuleLegacy --- src/Storages/ProjectionsDescription.cpp | 7 ++++++- src/Storages/ProjectionsDescription.h | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index e13895e60f1..5c9ae46dd60 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -179,7 +179,7 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const ProjectionDescription ProjectionDescription::getMinMaxCountProjection( const ColumnsDescription & columns, - const ASTPtr & partition_columns, + ASTPtr partition_columns, const Names & minmax_columns, const ASTs & primary_key_asts, ContextPtr query_context) @@ -203,7 +203,12 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); if (partition_columns && !partition_columns->children.empty()) + { + partition_columns = partition_columns->clone(); + for (const auto & partition_column : partition_columns->children) + KeyDescription::moduloToModuloLegacyRecursive(partition_column); select_query->setExpression(ASTProjectionSelectQuery::Expression::GROUP_BY, partition_columns->clone()); + } result.definition_ast = select_query; result.name = MINMAX_COUNT_PROJECTION_NAME; diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 960e94e22f4..3e8d5e1a4f1 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -73,7 +73,7 @@ struct ProjectionDescription static ProjectionDescription getMinMaxCountProjection( const ColumnsDescription & columns, - const ASTPtr & partition_columns, + ASTPtr partition_columns, const Names & minmax_columns, const ASTs & primary_key_asts, ContextPtr query_context); From 3fab7af541eeb32555978f17aa22e47d32d529c8 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:33:19 +0800 Subject: [PATCH 19/39] Bug fix and improvement of minmax_count_projection --- src/Storages/MergeTree/MergeTreeData.cpp | 60 +++++++++++++++++++++++- src/Storages/MergeTree/MergeTreeData.h | 2 + 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index aaf6cf3884e..db650f6e35b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4505,6 +4505,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const SelectQueryInfo & query_info, const DataPartsVector & parts, DataPartsVector & normal_parts, + const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const { if (!metadata_snapshot->minmax_count_projection) @@ -4541,6 +4542,23 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( if (virtual_columns_block.rows() == 0) return {}; + std::optional partition_pruner; + std::optional minmax_idx_condition; + DataTypes minmax_columns_types; + if (metadata_snapshot->hasPartitionKey()) + { + const auto & partition_key = metadata_snapshot->getPartitionKey(); + auto minmax_columns_names = getMinMaxColumnsNames(partition_key); + minmax_columns_types = getMinMaxColumnsTypes(partition_key); + + minmax_idx_condition.emplace( + query_info, + query_context, + minmax_columns_names, + getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(query_context))); + partition_pruner.emplace(metadata_snapshot, query_info, query_context, false /* strict */); + } + // Generate valid expressions for filtering VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, query_context, virtual_columns_block, expression_ast); if (expression_ast) @@ -4549,6 +4567,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( size_t rows = virtual_columns_block.rows(); const ColumnString & part_name_column = typeid_cast(*virtual_columns_block.getByName("_part").column); size_t part_idx = 0; + auto filter_column = ColumnUInt8::create(); + auto & filter_column_data = filter_column->getData(); for (size_t row = 0; row < rows; ++row) { while (parts[part_idx]->name != part_name_column.getDataAt(row)) @@ -4559,12 +4579,32 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( if (!part->minmax_idx->initialized) throw Exception("Found a non-empty part with uninitialized minmax_idx. It's a bug", ErrorCodes::LOGICAL_ERROR); + filter_column_data.emplace_back(); + + if (max_block_numbers_to_read) + { + auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); + if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second) + continue; + } + + if (minmax_idx_condition + && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx->hyperrectangle, minmax_columns_types).can_be_true) + continue; + + if (partition_pruner) + { + if (partition_pruner->canBePruned(*part)) + continue; + } + if (need_primary_key_max_column && !part->index_granularity.hasFinalMark()) { normal_parts.push_back(part); continue; } + filter_column_data.back() = 1; size_t pos = 0; for (size_t i : metadata_snapshot->minmax_count_projection->partition_value_indices) { @@ -4607,6 +4647,16 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( } block.setColumns(std::move(partition_minmax_count_columns)); + FilterDescription filter(*filter_column); + for (size_t i = 0; i < virtual_columns_block.columns(); ++i) + { + ColumnPtr & column = virtual_columns_block.safeGetByPosition(i).column; + column = column->filter(*filter.data, -1); + } + + if (block.rows() == 0) + return {}; + Block res; for (const auto & name : required_columns) { @@ -4882,9 +4932,15 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { DataPartsVector normal_parts; query_info.minmax_count_projection_block = getMinMaxCountProjectionBlock( - metadata_snapshot, minmax_conut_projection_candidate->required_columns, query_info, parts, normal_parts, query_context); + metadata_snapshot, + minmax_conut_projection_candidate->required_columns, + query_info, + parts, + normal_parts, + max_added_blocks.get(), + query_context); - if (minmax_conut_projection_candidate->prewhere_info) + if (query_info.minmax_count_projection_block && minmax_conut_projection_candidate->prewhere_info) { const auto & prewhere_info = minmax_conut_projection_candidate->prewhere_info; if (prewhere_info->alias_actions) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 649dae52852..93add8d6935 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -43,6 +43,7 @@ class MergeTreePartsMover; class MergeTreeDataMergerMutator; class MutationCommands; class Context; +using PartitionIdToMaxBlock = std::unordered_map; struct JobAndPool; struct ZeroCopyLock; @@ -391,6 +392,7 @@ public: const SelectQueryInfo & query_info, const DataPartsVector & parts, DataPartsVector & normal_parts, + const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const; std::optional getQueryProcessingStageWithAggregateProjection( From 1ab773cc9075bad57da2c61039df39997ab6f568 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 20:47:27 +0800 Subject: [PATCH 20/39] Fix aggregation_in_order with normal projection --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 37 ++++++++---- src/Storages/SelectQueryInfo.h | 1 + ..._projection_aggregation_in_order.reference | 20 +++++++ .../01710_projection_aggregation_in_order.sql | 59 +++++++++++++++++++ 5 files changed, 107 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/01710_projection_aggregation_in_order.reference create mode 100644 tests/queries/0_stateless/01710_projection_aggregation_in_order.sql diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 87ccf3dfa1c..f17c64ea71f 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1950,7 +1950,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc query_info.projection->order_optimizer = std::make_shared( query, query_info.projection->group_by_elements_actions, - getSortDescriptionFromGroupBy(query), + query_info.projection->group_by_elements_order_descr, query_info.syntax_analyzer_result); } else diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index db650f6e35b..c81b05e9284 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4740,7 +4740,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg keys.insert(desc.name); key_name_pos_map.insert({desc.name, pos++}); } - auto actions_settings = ExpressionActionsSettings::fromSettings(settings); + auto actions_settings = ExpressionActionsSettings::fromSettings(settings, CompileExpressions::yes); // All required columns should be provided by either current projection or previous actions // Let's traverse backward to finish the check. @@ -4876,6 +4876,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg auto actions_dag = analysis_result.before_aggregation->clone(); actions_dag->foldActionsByProjection({key}, sample_block_for_keys); candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); + candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); } } @@ -4892,18 +4893,32 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg } } - if (projection.type == ProjectionDescription::Type::Normal && (analysis_result.hasWhere() || analysis_result.hasPrewhere())) + if (projection.type == ProjectionDescription::Type::Normal) { - const auto & actions - = analysis_result.before_aggregation ? analysis_result.before_aggregation : analysis_result.before_order_by; - NameSet required_columns; - for (const auto & column : actions->getRequiredColumns()) - required_columns.insert(column.name); - - if (rewrite_before_where(candidate, projection, required_columns, sample_block, {})) + if (analysis_result.before_aggregation && analysis_result.optimize_aggregation_in_order) { - candidate.required_columns = {required_columns.begin(), required_columns.end()}; - candidates.push_back(std::move(candidate)); + for (const auto & key : keys) + { + auto actions_dag = analysis_result.before_aggregation->clone(); + actions_dag->foldActionsByProjection({key}, sample_block_for_keys); + candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); + candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); + } + } + + if (analysis_result.hasWhere() || analysis_result.hasPrewhere()) + { + const auto & actions + = analysis_result.before_aggregation ? analysis_result.before_aggregation : analysis_result.before_order_by; + NameSet required_columns; + for (const auto & column : actions->getRequiredColumns()) + required_columns.insert(column.name); + + if (rewrite_before_where(candidate, projection, required_columns, sample_block, {})) + { + candidate.required_columns = {required_columns.begin(), required_columns.end()}; + candidates.push_back(std::move(candidate)); + } } } }; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 2486bcbf5c6..f15f2dd2626 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -127,6 +127,7 @@ struct ProjectionCandidate ReadInOrderOptimizerPtr order_optimizer; InputOrderInfoPtr input_order_info; ManyExpressionActions group_by_elements_actions; + SortDescription group_by_elements_order_descr; std::shared_ptr subqueries_for_sets; MergeTreeDataSelectAnalysisResultPtr merge_tree_projection_select_result_ptr; MergeTreeDataSelectAnalysisResultPtr merge_tree_normal_select_result_ptr; diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference b/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference new file mode 100644 index 00000000000..a57b2e2cb0d --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference @@ -0,0 +1,20 @@ +291519000 +276078600 +304558200 +330478200 +317518200 +330478200 +276078600 +343438200 +291519000 +317518200 +291519000 +276078600 +304558200 +330478200 +317518200 +330478200 +276078600 +343438200 +291519000 +317518200 diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql new file mode 100644 index 00000000000..af2a5dc8253 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql @@ -0,0 +1,59 @@ +DROP TABLE IF EXISTS normal; + +CREATE TABLE normal +( + `key` UInt32, + `ts` DateTime, + `value` UInt32, + PROJECTION aaaa + ( + SELECT + ts, + key, + value + ORDER BY (ts, key) + ) +) +ENGINE = MergeTree +ORDER BY (key, ts); + +INSERT INTO normal SELECT + 1, + toDateTime('2021-12-06 00:00:00') + number, + number +FROM numbers(100000); + +SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; + +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; + +DROP TABLE IF EXISTS agg; + +CREATE TABLE agg +( + `key` UInt32, + `ts` DateTime, + `value` UInt32, + PROJECTION aaaa + ( + SELECT + ts, + key, + sum(value) + GROUP BY (ts, key) + ) +) +ENGINE = MergeTree +ORDER BY (key, ts); + +INSERT INTO agg SELECT + 1, + toDateTime('2021-12-06 00:00:00') + number, + number +FROM numbers(100000); + +SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; + +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; From 01d58fc9bbe4822c917b246a60b82d5c3374ab7d Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:34:03 +0800 Subject: [PATCH 21/39] Enable projection by default --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 48dd637a943..0643400f473 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -479,7 +479,7 @@ class IColumn; M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ - M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \ + M(Bool, allow_experimental_projection_optimization, true, "Enable projection optimization when processing SELECT queries", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \ From 52aabf98fe7b75213a8757f9572894ed880cf6de Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 6 Feb 2022 16:45:49 +0800 Subject: [PATCH 22/39] Revise and add more comments --- src/Interpreters/ActionsDAG.cpp | 8 ++++- src/Interpreters/ActionsDAG.h | 31 +++++++++++++++++ src/Storages/MergeTree/MergeTreeData.cpp | 42 ++++++++++-------------- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index a4560eb1c15..6ed35210251 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -602,8 +602,8 @@ NameSet ActionsDAG::foldActionsByProjection( std::unordered_set visited_nodes; std::unordered_set visited_index_names; std::stack stack; - std::vector missing_input_from_projection_keys; + /// Record all needed index nodes to start folding. for (const auto & node : index) { if (required_columns.find(node->result_name) != required_columns.end() || node->result_name == predicate_column_name) @@ -614,6 +614,9 @@ NameSet ActionsDAG::foldActionsByProjection( } } + /// If some required columns are not in any index node, try searching from all projection key + /// columns. If still missing, return empty set which means current projection fails to match + /// (missing columns). if (add_missing_keys) { for (const auto & column : required_columns) @@ -636,6 +639,7 @@ NameSet ActionsDAG::foldActionsByProjection( } } + /// Traverse the DAG from root to leaf. Substitute any matched node with columns in projection_block_for_keys. while (!stack.empty()) { auto * node = stack.top(); @@ -664,10 +668,12 @@ NameSet ActionsDAG::foldActionsByProjection( } } + /// Clean up unused nodes after folding. std::erase_if(inputs, [&](const Node * node) { return visited_nodes.count(node) == 0; }); std::erase_if(index, [&](const Node * node) { return visited_index_names.count(node->result_name) == 0; }); nodes.remove_if([&](const Node & node) { return visited_nodes.count(&node) == 0; }); + /// Calculate the required columns after folding. NameSet next_required_columns; for (const auto & input : inputs) next_required_columns.insert(input->result_name); diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 9a5ad01a252..b07ab08c997 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -163,12 +163,43 @@ public: void removeUnusedActions(const Names & required_names, bool allow_remove_inputs = true, bool allow_constant_folding = true); void removeUnusedActions(const NameSet & required_names, bool allow_remove_inputs = true, bool allow_constant_folding = true); + /// Transform the current DAG in a way that leaf nodes get folded into their parents. It's done + /// because each projection can provide some columns as inputs to substitute certain sub-DAGs + /// (expressions). Consider the following example: + /// CREATE TABLE tbl (dt DateTime, val UInt64, + /// PROJECTION p_hour (SELECT SUM(val) GROUP BY toStartOfHour(dt))); + /// + /// Query: SELECT toStartOfHour(dt), SUM(val) FROM tbl GROUP BY toStartOfHour(dt); + /// + /// We will have an ActionsDAG like this: + /// FUNCTION: toStartOfHour(dt) SUM(val) + /// ^ ^ + /// | | + /// INPUT: dt val + /// + /// Now we traverse the DAG and see if any FUNCTION node can be replaced by projection's INPUT node. + /// The result DAG will be: + /// INPUT: toStartOfHour(dt) SUM(val) + /// + /// We don't need aggregate columns from projection because they are matched after DAG. + /// Currently we use canonical names of each node to find matches. It can be improved after we + /// have a full-featured name binding system. + /// + /// @param required_columns should contain columns which this DAG is required to produce after folding. It used for result actions. + /// @param projection_block_for_keys contains all key columns of given projection. + /// @param predicate_column_name means we need to produce the predicate column after folding. + /// @param add_missing_keys means whether to add additional missing columns to input nodes from projection key columns directly. + /// @return required columns for this folded DAG. It's expected to be fewer than the original ones if some projection is used. NameSet foldActionsByProjection( const NameSet & required_columns, const Block & projection_block_for_keys, const String & predicate_column_name = {}, bool add_missing_keys = true); + + /// Reorder the index nodes using given position mapping. void reorderAggregationKeysForProjection(const std::unordered_map & key_names_pos_map); + + /// Add aggregate columns to index nodes from projection void addAggregatesViaProjection(const Block & aggregates); bool hasArrayJoin() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c81b05e9284..68fa81e1df9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4776,7 +4776,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg required_columns.erase(column.name); { - // Prewhere_action should not add missing keys. + // prewhere_action should not add missing keys. auto new_prewhere_required_columns = prewhere_actions->foldActionsByProjection( prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->prewhere_column_name, false); if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) @@ -4788,6 +4788,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (candidate.prewhere_info->row_level_filter) { auto row_level_filter_actions = candidate.prewhere_info->row_level_filter->clone(); + // row_level_filter_action should not add missing keys. auto new_prewhere_required_columns = row_level_filter_actions->foldActionsByProjection( prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->row_level_column_name, false); if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) @@ -4799,6 +4800,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (candidate.prewhere_info->alias_actions) { auto alias_actions = candidate.prewhere_info->alias_actions->clone(); + // alias_action should not add missing keys. auto new_prewhere_required_columns = alias_actions->foldActionsByProjection(prewhere_required_columns, projection.sample_block_for_keys, {}, false); if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) @@ -4836,6 +4838,18 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg sample_block_for_keys.insertUnique(column); } + // If optimize_aggregation_in_order = true, we need additional information to transform the projection's pipeline. + auto attach_aggregation_in_order_info = [&]() + { + for (const auto & key : keys) + { + auto actions_dag = analysis_result.before_aggregation->clone(); + actions_dag->foldActionsByProjection({key}, sample_block_for_keys); + candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); + candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); + } + }; + if (projection.type == ProjectionDescription::Type::Aggregate && analysis_result.need_aggregate && can_use_aggregate_projection) { bool match = true; @@ -4845,16 +4859,13 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { const auto * column = sample_block.findByName(aggregate.column_name); if (column) - { aggregates.insert(*column); - } else { match = false; break; } } - if (!match) return; @@ -4870,15 +4881,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg return; if (analysis_result.optimize_aggregation_in_order) - { - for (const auto & key : keys) - { - auto actions_dag = analysis_result.before_aggregation->clone(); - actions_dag->foldActionsByProjection({key}, sample_block_for_keys); - candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); - candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); - } - } + attach_aggregation_in_order_info(); // Reorder aggregation keys and attach aggregates candidate.before_aggregation->reorderAggregationKeysForProjection(key_name_pos_map); @@ -4892,19 +4895,10 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg candidates.push_back(std::move(candidate)); } } - - if (projection.type == ProjectionDescription::Type::Normal) + else if (projection.type == ProjectionDescription::Type::Normal) { if (analysis_result.before_aggregation && analysis_result.optimize_aggregation_in_order) - { - for (const auto & key : keys) - { - auto actions_dag = analysis_result.before_aggregation->clone(); - actions_dag->foldActionsByProjection({key}, sample_block_for_keys); - candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); - candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); - } - } + attach_aggregation_in_order_info(); if (analysis_result.hasWhere() || analysis_result.hasPrewhere()) { From a6f0b01e6a4280c7780c00d7b87b8cd69881d5dc Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 7 Feb 2022 00:42:11 +0800 Subject: [PATCH 23/39] Fix order by after aggregation --- src/Interpreters/InterpreterSelectQuery.cpp | 4 +++- ...710_projection_aggregation_in_order.reference | 16 ++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f17c64ea71f..dc00edad612 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1180,8 +1180,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info.reset(); } // Now we must execute: diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference b/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference index a57b2e2cb0d..12c613c184d 100644 --- a/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference @@ -1,20 +1,20 @@ -291519000 276078600 +291519000 304558200 -330478200 317518200 330478200 276078600 -343438200 291519000 -317518200 -291519000 -276078600 304558200 -330478200 317518200 330478200 276078600 -343438200 291519000 +304558200 317518200 +330478200 +276078600 +291519000 +304558200 +317518200 +330478200 From 2debfc922da002500db43683babeeb59394a211e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 7 Feb 2022 10:47:11 +0800 Subject: [PATCH 24/39] Better projection format and test fixes --- src/Parsers/ASTProjectionDeclaration.cpp | 2 +- src/Parsers/ASTProjectionSelectQuery.cpp | 12 +++++++++++- src/Parsers/ParserProjectionSelectQuery.cpp | 1 + src/Storages/ProjectionsDescription.cpp | 4 +++- .../01710_projection_aggregation_in_order.sql | 6 +++--- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ASTProjectionDeclaration.cpp b/src/Parsers/ASTProjectionDeclaration.cpp index 740a2fe3efd..60050986161 100644 --- a/src/Parsers/ASTProjectionDeclaration.cpp +++ b/src/Parsers/ASTProjectionDeclaration.cpp @@ -20,7 +20,7 @@ void ASTProjectionDeclaration::formatImpl(const FormatSettings & settings, Forma settings.ostr << backQuoteIfNeed(name); std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); std::string nl_or_nothing = settings.one_line ? "" : "\n"; - settings.ostr << nl_or_nothing << indent_str << "(" << nl_or_nothing; + settings.ostr << settings.nl_or_ws << indent_str << "(" << nl_or_nothing; FormatStateStacked frame_nested = frame; frame_nested.need_parens = false; ++frame_nested.indent; diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index 7a855eb2be2..8526c7aef26 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -72,8 +72,18 @@ void ASTProjectionSelectQuery::formatImpl(const FormatSettings & s, FormatState if (orderBy()) { + /// Let's convert the ASTFunction into ASTExpressionList, which generates consistent format + /// between GROUP BY and ORDER BY projection definition. s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY " << (s.hilite ? hilite_none : ""); - orderBy()->formatImpl(s, state, frame); + ASTPtr order_by; + if (auto * func = orderBy()->as()) + order_by = func->arguments; + else + { + order_by = std::make_shared(); + order_by->children.push_back(orderBy()); + } + s.one_line ? order_by->formatImpl(s, state, frame) : order_by->as().formatImplMultiline(s, state, frame); } } diff --git a/src/Parsers/ParserProjectionSelectQuery.cpp b/src/Parsers/ParserProjectionSelectQuery.cpp index 0467f84de2a..b2adb5cf154 100644 --- a/src/Parsers/ParserProjectionSelectQuery.cpp +++ b/src/Parsers/ParserProjectionSelectQuery.cpp @@ -55,6 +55,7 @@ bool ParserProjectionSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } + // ORDER BY needs to be an ASTFunction so that we can use it as a sorting key if (s_order_by.ignore(pos, expected)) { ASTPtr expr_list; diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 5c9ae46dd60..7c340cda739 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -107,7 +107,9 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const auto external_storage_holder = std::make_shared(query_context, columns, ConstraintsDescription{}); StoragePtr storage = external_storage_holder->getTable(); InterpreterSelectQuery select( - result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias()); + result.query_ast, query_context, storage, {}, + /// Here we ignore ast optimizations because otherwise aggregation keys may be removed from result header as constants. + SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias().ignoreASTOptimizationsAlias()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql index af2a5dc8253..557bd297436 100644 --- a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql @@ -11,7 +11,7 @@ CREATE TABLE normal ts, key, value - ORDER BY (ts, key) + ORDER BY ts, key ) ) ENGINE = MergeTree @@ -23,7 +23,7 @@ INSERT INTO normal SELECT number FROM numbers(100000); -SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; +SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection=1; WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; @@ -41,7 +41,7 @@ CREATE TABLE agg ts, key, sum(value) - GROUP BY (ts, key) + GROUP BY ts, key ) ) ENGINE = MergeTree From b2ba0c4320044bca6c98aa304450c869843b6c8a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 7 Feb 2022 03:20:53 +0700 Subject: [PATCH 25/39] Fix inserting to temporary tables via gRPC. --- src/Server/GRPCServer.cpp | 34 +------------------- tests/integration/test_grpc_protocol/test.py | 25 ++++++++++---- 2 files changed, 20 insertions(+), 39 deletions(-) diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index f252561d63b..8aa729b8883 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -990,41 +989,10 @@ namespace assert(!pipeline); auto source = query_context->getInputFormat( input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size); + QueryPipelineBuilder builder; builder.init(Pipe(source)); - /// Add default values if necessary. - if (ast) - { - if (insert_query) - { - auto table_id = StorageID::createEmpty(); - - if (insert_query->table_id) - { - table_id = query_context->resolveStorageID(insert_query->table_id, Context::ResolveOrdinary); - } - else - { - StorageID local_table_id(insert_query->getDatabase(), insert_query->getTable()); - table_id = query_context->resolveStorageID(local_table_id, Context::ResolveOrdinary); - } - - if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields && table_id) - { - StoragePtr storage = DatabaseCatalog::instance().getTable(table_id, query_context); - const auto & columns = storage->getInMemoryMetadataPtr()->getColumns(); - if (!columns.empty()) - { - builder.addSimpleTransform([&](const Block & cur_header) - { - return std::make_shared(cur_header, columns, *source, query_context); - }); - } - } - } - } - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); pipeline_executor = std::make_unique(*pipeline); } diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 2a91ebcd94b..b6968575883 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -177,13 +177,13 @@ def test_insert_query_delimiter(): assert query("SELECT a FROM t ORDER BY a") == "1\n5\n234\n" def test_insert_default_column(): - query("CREATE TABLE t (a UInt8, b Int32 DEFAULT 100, c String DEFAULT 'c') ENGINE = Memory") + query("CREATE TABLE t (a UInt8, b Int32 DEFAULT 100 - a, c String DEFAULT 'c') ENGINE = Memory") query("INSERT INTO t (c, a) VALUES ('x',1),('y',2)") query("INSERT INTO t (a) FORMAT TabSeparated", input_data="3\n4\n") - assert query("SELECT * FROM t ORDER BY a") == "1\t100\tx\n" \ - "2\t100\ty\n" \ - "3\t100\tc\n" \ - "4\t100\tc\n" + assert query("SELECT * FROM t ORDER BY a") == "1\t99\tx\n" \ + "2\t98\ty\n" \ + "3\t97\tc\n" \ + "4\t96\tc\n" def test_insert_splitted_row(): query("CREATE TABLE t (a UInt8) ENGINE = Memory") @@ -257,7 +257,7 @@ def test_progress(): } ]""" -def test_session(): +def test_session_settings(): session_a = "session A" session_b = "session B" query("SET custom_x=1", session_id=session_a) @@ -267,9 +267,22 @@ def test_session(): assert query("SELECT getSetting('custom_x'), getSetting('custom_y')", session_id=session_a) == "1\t2\n" assert query("SELECT getSetting('custom_x'), getSetting('custom_y')", session_id=session_b) == "3\t4\n" +def test_session_temp_tables(): + session_a = "session A" + session_b = "session B" + query("CREATE TEMPORARY TABLE my_temp_table(a Int8)", session_id=session_a) + query("INSERT INTO my_temp_table VALUES (10)", session_id=session_a) + assert query("SELECT * FROM my_temp_table", session_id=session_a) == "10\n" + query("CREATE TEMPORARY TABLE my_temp_table(a Int8)", session_id=session_b) + query("INSERT INTO my_temp_table VALUES (20)", session_id=session_b) + assert query("SELECT * FROM my_temp_table", session_id=session_b) == "20\n" + assert query("SELECT * FROM my_temp_table", session_id=session_a) == "10\n" + def test_no_session(): e = query_and_get_error("SET custom_x=1") assert "There is no session" in e.display_text + e = query_and_get_error("CREATE TEMPORARY TABLE my_temp_table(a Int8)") + assert "There is no session" in e.display_text def test_input_function(): query("CREATE TABLE t (a UInt8) ENGINE = Memory") From 542889d627793dd9e1043417b4dc5f74e15b049d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 7 Feb 2022 13:01:42 +0300 Subject: [PATCH 26/39] Update clickhouse-keeper.md --- docs/en/operations/clickhouse-keeper.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 48eb590aca2..35ec5d858f5 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -108,8 +108,13 @@ Examples of configuration for quorum with three nodes can be found in [integrati ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with: ```bash -clickhouse keeper --config /etc/your_path_to_config/config.xml --daemon -example: clickhouse keeper --config /etc/clickhouse-server/config.d/keeper_config.xml +clickhouse-keeper --config /etc/your_path_to_config/config.xml +``` + +If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as argument: + +```bash +clickhouse keeper --config /etc/your_path_to_config/config.xml ``` ## Four Letter Word Commands {#four-letter-word-commands} From 523d1059ed071c6313356316e56e5641040563ae Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 7 Feb 2022 13:59:03 +0300 Subject: [PATCH 27/39] Update KeyDescription.cpp --- src/Storages/KeyDescription.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 24b4b13bc21..f100f129cda 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -171,7 +171,7 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio return result; ParserExpression parser; - ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); FunctionNameNormalizer().visit(ast.get()); return getKeyFromAST(ast, columns, context); From 4a857d7d18810c7cb4d42d7518f8b91496c0a7ca Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Feb 2022 12:35:32 +0100 Subject: [PATCH 28/39] Use UTC in docker images --- docker/test/base/Dockerfile | 2 +- docker/test/fasttest/Dockerfile | 2 +- docker/test/fuzzer/Dockerfile | 2 +- docker/test/integration/base/Dockerfile | 2 +- docker/test/integration/runner/Dockerfile | 2 +- docker/test/performance-comparison/Dockerfile | 2 +- docker/test/stateless/Dockerfile | 2 +- docker/test/testflows/runner/Dockerfile | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 6beab2e5bb7..7d7c3e28087 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -73,7 +73,7 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone CMD sleep 1 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 46b74d89e13..a625ab316f0 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -87,7 +87,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index eb4b09c173f..659b53bfd7e 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 91b26735fe5..b6f2bdace01 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -60,5 +60,5 @@ clientPort=2181 \n\ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg RUN mkdir /zookeeper && chmod -R 777 /zookeeper -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 1aad2ae6770..a7a9230748f 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -40,7 +40,7 @@ RUN apt-get update \ /tmp/* \ && apt-get clean -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index eddaf969f33..5037739be36 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 9b7fde7d542..24ca13e4acc 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -42,7 +42,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index d15f237587b..f789a5e8b62 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ /tmp/* \ && apt-get clean -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy From 4a1e84306ad37879eb4511857344c94fc8c3213b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Feb 2022 12:57:58 +0100 Subject: [PATCH 29/39] Update performance comparison OS version --- docker/test/performance-comparison/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 5037739be36..d50bfce1e8f 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/performance-comparison . -FROM ubuntu:18.04 +FROM ubuntu:20.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" From c7ba5204b3fa3d8120a6111730902c7a7aaa9e0c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Feb 2022 13:17:21 +0100 Subject: [PATCH 30/39] Make testflows a multiarch image --- docker/test/testflows/runner/Dockerfile | 33 ++++++++++++++----------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index f789a5e8b62..69b3affd0e7 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -43,24 +43,27 @@ RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 20.10.6 -RUN set -eux; \ - \ -# this "case" statement is generated via "update.sh" - \ - if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \ - echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \ - exit 1; \ - fi; \ - \ - tar --extract \ +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + +# Install MySQL ODBC driver from RHEL rpm +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && set -eux \ + && if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \ + echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \ + && exit 1; \ + fi \ + && tar --extract \ --file docker.tgz \ --strip-components 1 \ --directory /usr/local/bin/ \ - ; \ - rm docker.tgz; \ - \ - dockerd --version; \ - docker --version + && rm docker.tgz \ + && dockerd --version \ + && docker --version COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ From 1995894eef5e7858fca60867ac704aa7399e4bfb Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 8 Feb 2022 02:32:23 +0800 Subject: [PATCH 31/39] Fix tests --- .../01705_normalize_create_alter_function_names.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference index 00c77f1500c..b6f5fe99ca1 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference @@ -1,2 +1,2 @@ CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm rand() TYPE minmax GRANULARITY 1,\n INDEX nn rand() TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192 -metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm rand() TYPE minmax GRANULARITY 1, nn rand() TYPE minmax GRANULARITY 1\nprojections: p(SELECT max(i)), p2(SELECT min(i))\ngranularity bytes: 10485760\n +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm rand() TYPE minmax GRANULARITY 1, nn rand() TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n From 1d13c68c9c7e9c3a6e500c58fb535cfc21f6fab3 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Tue, 8 Feb 2022 08:58:01 +0800 Subject: [PATCH 32/39] use `max` instead of `not any` to check all rows --- tests/queries/0_stateless/02161_addressToLineWithInlines.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index 29be9ae85f6..baddea30ae3 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -20,6 +20,6 @@ WITH SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment='02161_test_case' ORDER BY event_time DESC LIMIT 1 ) ) -SELECT 'has inlines:', or(max(length(lineWithInlines)) > 1, not any(locate(lineWithInlines[1], ':') != 0)) FROM lineWithInlines SETTINGS short_circuit_function_evaluation='enable'; +SELECT 'has inlines:', or(max(length(lineWithInlines)) > 1, max(locate(lineWithInlines[1], ':')) = 0) FROM lineWithInlines SETTINGS short_circuit_function_evaluation='enable'; -- `max(length(lineWithInlines)) > 1` check there is any inlines. --- `not any(locate(lineWithInlines[1], ':') != 0)` check whether none could get a symbol. +-- `max(locate(lineWithInlines[1], ':')) = 0` check whether none could get a symbol. From 6b3adbb0deaf1aea62f7d7cb4aa24debbb2d0443 Mon Sep 17 00:00:00 2001 From: Rajkumar Date: Mon, 7 Feb 2022 19:50:34 -0800 Subject: [PATCH 33/39] Method called on already moved --- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 8a56c2ed5c7..1cd18087f56 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) - : IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) + : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) { const auto & sample_block = getPort().getHeader(); size_t num_columns = sample_block.columns(); From e118c89bb26869d973c3ebc93b5d9539dbf18b40 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 8 Feb 2022 13:40:03 +0300 Subject: [PATCH 34/39] Fix segfault in schema inference from url --- src/Storages/StorageURL.cpp | 16 ++++++++++++++-- .../configs/named_collections.xml | 5 +++++ tests/integration/test_storage_s3/test.py | 13 +++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 32ab126faa9..dd2736613b3 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -82,16 +82,28 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( const std::optional & format_settings, ContextPtr context) { + auto parsed_uri = Poco::URI(uri); + Poco::Net::HTTPBasicCredentials credentials; + std::string user_info = parsed_uri.getUserInfo(); + if (!user_info.empty()) + { + std::size_t n = user_info.find(':'); + if (n != std::string::npos) + { + credentials.setUsername(user_info.substr(0, n)); + credentials.setPassword(user_info.substr(n + 1)); + } + } + auto read_buffer_creator = [&]() { - auto parsed_uri = Poco::URI(uri); return wrapReadBufferWithCompressionMethod( std::make_unique( parsed_uri, Poco::Net::HTTPRequest::HTTP_GET, nullptr, ConnectionTimeouts::getHTTPTimeouts(context), - Poco::Net::HTTPBasicCredentials{}, + credentials, context->getSettingsRef().max_http_get_redirects, DBMS_DEFAULT_BUFFER_SIZE, context->getReadSettings(), diff --git a/tests/integration/test_storage_s3/configs/named_collections.xml b/tests/integration/test_storage_s3/configs/named_collections.xml index f22440d17c9..fcc8bcac555 100644 --- a/tests/integration/test_storage_s3/configs/named_collections.xml +++ b/tests/integration/test_storage_s3/configs/named_collections.xml @@ -30,5 +30,10 @@ minio minio123 + + http://minio1:9001/root/test.parquet + minio + minio123 + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 2ed5ca51054..4366a1f034e 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -981,3 +981,16 @@ def test_format_detection(started_cluster): result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") assert(int(result) == 1) + + instance.query(f"create table parquet_table_s3 (x UInt64) engine=S3(s3_parquet2)") + instance.query(f"insert into parquet_table_s3 select 1") + result = instance.query(f"select * from s3(s3_parquet2)") + assert(int(result) == 1) + + result = instance.query(f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.parquet')") + assert(int(result) == 1) + + result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.parquet')") + assert(int(result) == 1) + + From 428d6f1581d707b01fda410fa846ed3d66a7d727 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 8 Feb 2022 17:19:37 +0300 Subject: [PATCH 35/39] Update Settings.h --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0643400f473..48dd637a943 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -479,7 +479,7 @@ class IColumn; M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ - M(Bool, allow_experimental_projection_optimization, true, "Enable projection optimization when processing SELECT queries", 0) \ + M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \ From 2d02eab75045b6fc449188b375a0e62110896c1a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 8 Feb 2022 15:34:01 +0100 Subject: [PATCH 36/39] Revert "Merge pull request #34373 from ClickHouse/docker-tz" This reverts commit efd8044ab7cd35feb4e0b1bccc63f46d42e34c82, reversing changes made to 4bb69bcb15374b696080c5fd2fe1090dc0bec2a2. --- docker/test/base/Dockerfile | 2 +- docker/test/fasttest/Dockerfile | 2 +- docker/test/fuzzer/Dockerfile | 2 +- docker/test/integration/base/Dockerfile | 2 +- docker/test/integration/runner/Dockerfile | 2 +- docker/test/performance-comparison/Dockerfile | 4 +-- docker/test/stateless/Dockerfile | 2 +- docker/test/testflows/runner/Dockerfile | 35 +++++++++---------- 8 files changed, 24 insertions(+), 27 deletions(-) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 7d7c3e28087..6beab2e5bb7 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -73,7 +73,7 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone CMD sleep 1 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index a625ab316f0..46b74d89e13 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -87,7 +87,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index 659b53bfd7e..eb4b09c173f 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index b6f2bdace01..91b26735fe5 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -60,5 +60,5 @@ clientPort=2181 \n\ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg RUN mkdir /zookeeper && chmod -R 777 /zookeeper -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index a7a9230748f..1aad2ae6770 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -40,7 +40,7 @@ RUN apt-get update \ /tmp/* \ && apt-get clean -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index d50bfce1e8f..eddaf969f33 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,12 +1,12 @@ # docker build -t clickhouse/performance-comparison . -FROM ubuntu:20.04 +FROM ubuntu:18.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 24ca13e4acc..9b7fde7d542 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -42,7 +42,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index 69b3affd0e7..d15f237587b 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ /tmp/* \ && apt-get clean -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy @@ -43,27 +43,24 @@ RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 20.10.6 -# Architecture of the image when BuildKit/buildx is used -ARG TARGETARCH - -# Install MySQL ODBC driver from RHEL rpm -RUN arch=${TARGETARCH:-amd64} \ - && case $arch in \ - amd64) rarch=x86_64 ;; \ - arm64) rarch=aarch64 ;; \ - esac \ - && set -eux \ - && if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \ - echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \ - && exit 1; \ - fi \ - && tar --extract \ +RUN set -eux; \ + \ +# this "case" statement is generated via "update.sh" + \ + if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \ + echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \ + exit 1; \ + fi; \ + \ + tar --extract \ --file docker.tgz \ --strip-components 1 \ --directory /usr/local/bin/ \ - && rm docker.tgz \ - && dockerd --version \ - && docker --version + ; \ + rm docker.tgz; \ + \ + dockerd --version; \ + docker --version COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ From 9f2628b8f2af39b6bbfc61f5490e1911c8cba234 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 8 Feb 2022 16:41:55 +0100 Subject: [PATCH 37/39] Fix --- .../PostgreSQL/StorageMaterializedPostgreSQL.cpp | 10 ++++++++-- src/Storages/ReadFinalForExternalReplicaStorage.cpp | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index fe81b322bdb..c72dec824f0 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -277,10 +277,16 @@ Pipe StorageMaterializedPostgreSQL::read( size_t max_block_size, unsigned num_streams) { - auto materialized_table_lock = lockForShare(String(), context_->getSettingsRef().lock_acquire_timeout); auto nested_table = getNested(); - return readFinalFromNestedStorage(nested_table, column_names, metadata_snapshot, + + auto pipe = readFinalFromNestedStorage(nested_table, column_names, metadata_snapshot, query_info, context_, processed_stage, max_block_size, num_streams); + + auto lock = lockForShare(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout); + pipe.addTableLock(lock); + pipe.addStorageHolder(shared_from_this()); + + return pipe; } diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index 36a40beca36..58b98aaa4c6 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -57,6 +57,7 @@ Pipe readFinalFromNestedStorage( Pipe pipe = nested_storage->read(require_columns_name, nested_metadata, query_info, context, processed_stage, max_block_size, num_streams); pipe.addTableLock(lock); + pipe.addStorageHolder(nested_storage); if (!expressions->children.empty() && !pipe.empty()) { From b8e350054c17f861166be0bbdf9841ee891c621e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 8 Feb 2022 21:21:32 +0000 Subject: [PATCH 38/39] clang-tidy move fix build --- src/Processors/ForkProcessor.cpp | 4 ++-- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/ForkProcessor.cpp b/src/Processors/ForkProcessor.cpp index 9b17f8ad5ca..f4e5a5be5f2 100644 --- a/src/Processors/ForkProcessor.cpp +++ b/src/Processors/ForkProcessor.cpp @@ -63,9 +63,9 @@ ForkProcessor::Status ForkProcessor::prepare() { ++num_processed_outputs; if (num_processed_outputs == num_active_outputs) - output.push(std::move(data)); // NOLINT Can push because no full or unneeded outputs. + output.push(std::move(data)); /// NOLINT Can push because no full or unneeded outputs. else - output.push(data.clone()); + output.push(data.clone()); /// NOLINT } } diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 8a56c2ed5c7..1cd18087f56 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) - : IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) + : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) { const auto & sample_block = getPort().getHeader(); size_t num_columns = sample_block.columns(); From 00330461d1eefc541cff813fa5b8981cd4585b27 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 9 Feb 2022 03:56:57 +0300 Subject: [PATCH 39/39] Update int-uint.md --- docs/en/sql-reference/data-types/int-uint.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index 588b5a2d7d6..4cc590d9fa5 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -1,9 +1,9 @@ --- toc_priority: 40 -toc_title: UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 +toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 --- -# UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 {#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256} +# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 Fixed-length integers, with or without a sign.