diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 843cb16f572..47e33806930 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -392,5 +392,43 @@ Result: └─────────────────────────────┘ ``` +## mapExtractKeyLike {#mapExtractKeyLike} + +**Syntax** + +```sql +mapExtractKeyLike(map, pattern) +``` + +**Parameters** + +- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `pattern` - String pattern to match. + +**Returned value** + +- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map. + +**Example** + +Query: + +```sql +CREATE TABLE test (a Map(String,String)) ENGINE = Memory; + +INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); + +SELECT mapExtractKeyLike(a, 'a%') FROM test; +``` + +Result: + +```text +┌─mapExtractKeyLike(a, 'a%')─┐ +│ {'abc':'abc'} │ +│ {} │ +└────────────────────────────┘ +``` + [Original article](https://clickhouse.com/docs/en/sql-reference/functions/tuple-map-functions/) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index edb0c28c980..03a9da404c2 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -382,6 +382,142 @@ public: bool useDefaultImplementationForConstants() const override { return true; } }; +class FunctionExtractKeyLike : public IFunction +{ +public: + static constexpr auto name = "mapExtractKeyLike"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*info*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 2", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + + const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); + + if (!map_type) + throw Exception{"First argument for function " + getName() + " must be a map", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + + auto key_type = map_type->getKeyType(); + + WhichDataType which(key_type); + + if (!which.isStringOrFixedString()) + throw Exception{"Function " + getName() + "only support the map with String or FixedString key", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + if (!isStringOrFixedString(arguments[1].type)) + throw Exception{"Second argument passed to function " + getName() + " must be String or FixedString", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return std::make_shared(map_type->getKeyType(), map_type->getValueType()); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + bool is_const = isColumnConst(*arguments[0].column); + const ColumnMap * col_map = typeid_cast(arguments[0].column.get()); + + //It may not be necessary to check this condition, cause it will be checked in getReturnTypeImpl function + if (!col_map) + return nullptr; + + const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); + auto key_type = map_type->getKeyType(); + auto value_type = map_type->getValueType(); + + const auto & nested_column = col_map->getNestedColumn(); + const auto & keys_column = col_map->getNestedData().getColumn(0); + const auto & values_column = col_map->getNestedData().getColumn(1); + const ColumnString * keys_string_column = checkAndGetColumn(keys_column); + const ColumnFixedString * keys_fixed_string_column = checkAndGetColumn(keys_column); + + FunctionLike func_like; + + //create result data + MutableColumnPtr keys_data = key_type->createColumn(); + MutableColumnPtr values_data = value_type->createColumn(); + MutableColumnPtr offsets = DataTypeNumber().createColumn(); + + IColumn::Offset current_offset = 0; + + for (size_t row = 0; row < input_rows_count; row++) + { + size_t element_start_row = row != 0 ? nested_column.getOffsets()[row-1] : 0; + size_t element_size = nested_column.getOffsets()[row]- element_start_row; + + ColumnsWithTypeAndName new_arguments; + ColumnPtr sub_map_column; + DataTypePtr data_type; + + if (keys_string_column) + { + sub_map_column = keys_string_column->cut(element_start_row, element_size); + data_type = std::make_shared(); + } + else + { + sub_map_column = keys_fixed_string_column->cut(element_start_row, element_size); + data_type =std::make_shared(checkAndGetColumn(sub_map_column.get())->getN()); + } + + size_t col_key_size = sub_map_column->size(); + auto column = is_const? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column); + + new_arguments = { + { + column, + data_type, + "" + }, + arguments[1] + }; + + auto res = func_like.executeImpl(new_arguments, result_type, input_rows_count); + const auto & container = checkAndGetColumn(res.get())->getData(); + + for (size_t row_num = 0; row_num < element_size; row_num++) + { + if (container[row_num] == 1) + { + auto key_ref = keys_string_column ? + keys_string_column->getDataAt(element_start_row + row_num) : + keys_fixed_string_column->getDataAt(element_start_row + row_num); + auto value_ref = values_column.getDataAt(element_start_row + row_num); + + keys_data->insertData(key_ref.data, key_ref.size); + values_data->insertData(value_ref.data, value_ref.size); + current_offset += 1; + } + } + + offsets->insert(current_offset); + } + + auto result_nested_column = ColumnArray::create( + ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}), + std::move(offsets)); + + return ColumnMap::create(result_nested_column); + } +}; + } void registerFunctionsMap(FunctionFactory & factory) @@ -391,6 +527,7 @@ void registerFunctionsMap(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 5254b140679..16259e8057e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -252,9 +252,6 @@ namespace detail impl->position() = position(); } - if (!working_buffer.empty()) - impl->position() = position(); - if (!impl->next()) return false; diff --git a/src/Storages/FileLog/FileLogSettings.h b/src/Storages/FileLog/FileLogSettings.h index d14120d0ba0..865727bd864 100644 --- a/src/Storages/FileLog/FileLogSettings.h +++ b/src/Storages/FileLog/FileLogSettings.h @@ -14,7 +14,7 @@ class ASTStorage; M(Milliseconds, poll_timeout_ms, 0, "Timeout for single poll from StorageFileLog.", 0) \ M(UInt64, poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single StorageFileLog poll.", 0) \ M(UInt64, max_block_size, 0, "Number of row collected by poll(s) for flushing data from StorageFileLog.", 0) \ - M(UInt64, max_threads, 8, "Number of max threads to parse files, default is 8", 0) \ + M(UInt64, max_threads, 0, "Number of max threads to parse files, default is 0, which means the number will be max(1, physical_cpu_cores / 4)", 0) \ M(Milliseconds, poll_directory_watch_events_backoff_init, 500, "The initial sleep value for watch directory thread.", 0) \ M(Milliseconds, poll_directory_watch_events_backoff_max, 32000, "The max sleep value for watch directory thread.", 0) \ M(UInt64, poll_directory_watch_events_backoff_factor, 2, "The speed of backoff, exponential by default", 0) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index d9b08762d28..1ba16fc821c 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -750,7 +750,12 @@ void registerStorageFileLog(StorageFactory & factory) auto physical_cpu_cores = getNumberOfPhysicalCPUCores(); auto num_threads = filelog_settings->max_threads.value; - if (num_threads > physical_cpu_cores) + if (!num_threads) /// Default + { + num_threads = std::max(unsigned(1), physical_cpu_cores / 4); + filelog_settings->set("max_threads", num_threads); + } + else if (num_threads > physical_cpu_cores) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of threads to parse files can not be bigger than {}", physical_cpu_cores); } diff --git a/tests/integration/test_disk_over_web_server/configs/async_read.xml b/tests/integration/test_disk_over_web_server/configs/async_read.xml new file mode 100644 index 00000000000..4449d83779a --- /dev/null +++ b/tests/integration/test_disk_over_web_server/configs/async_read.xml @@ -0,0 +1,7 @@ + + + + read_threadpool + + + diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index b8629fdbe9e..55f760f514f 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -11,6 +11,7 @@ def cluster(): cluster.add_instance("node1", main_configs=["configs/storage_conf.xml"], with_nginx=True) cluster.add_instance("node2", main_configs=["configs/storage_conf_web.xml"], with_nginx=True) cluster.add_instance("node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True) + cluster.add_instance("node_async_read", main_configs=["configs/storage_conf_web.xml"], user_configs=["configs/async_read.xml"], with_nginx=True) cluster.start() node1 = cluster.instances["node1"] @@ -37,9 +38,10 @@ def cluster(): cluster.shutdown() -def test_usage(cluster): +@pytest.mark.parametrize("node_name", ["node2", "node_async_read"]) +def test_usage(cluster, node_name): node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] + node2 = cluster.instances[node_name] global uuids assert(len(uuids) == 3) for i in range(3): @@ -49,6 +51,8 @@ def test_usage(cluster): SETTINGS storage_policy = 'web'; """.format(i, uuids[i], i, i)) + result = node2.query("SELECT * FROM test{} settings max_threads=20".format(i)) + result = node2.query("SELECT count() FROM test{}".format(i)) assert(int(result) == 500000 * (i+1)) @@ -82,4 +86,3 @@ def test_incorrect_usage(cluster): assert("Table is read-only" in result) node2.query("DROP TABLE test0") - diff --git a/tests/queries/0_stateless/02111_function_mapExtractKeyLike.reference b/tests/queries/0_stateless/02111_function_mapExtractKeyLike.reference new file mode 100644 index 00000000000..45edbc24c75 --- /dev/null +++ b/tests/queries/0_stateless/02111_function_mapExtractKeyLike.reference @@ -0,0 +1,23 @@ +The data of table: +1 {'P1-K1':'1-V1','P2-K2':'1-V2'} +2 {'P1-K1':'2-V1','P2-K2':'2-V2'} +3 {'P1-K1':'3-V1','P2-K2':'3-V2'} +4 {'P1-K1':'4-V1','P2-K2':'4-V2'} +5 {'5-K1':'5-V1','5-K2':'5-V2'} +6 {'P3-K1':'6-V1','P4-K2':'6-V2'} + +The results of query: SELECT id, mapExtractKeyLike(map, \'P1%\') FROM map_extractKeyLike_test ORDER BY id; +1 {'P1-K1':'1-V1'} +2 {'P1-K1':'2-V1'} +3 {'P1-K1':'3-V1'} +4 {'P1-K1':'4-V1'} +5 {} +6 {} + +The results of query: SELECT id, mapExtractKeyLike(map, \'5-K1\') FROM map_extractKeyLike_test ORDER BY id; +1 {} +2 {} +3 {} +4 {} +5 {'5-K1':'5-V1'} +6 {} diff --git a/tests/queries/0_stateless/02111_function_mapExtractKeyLike.sql b/tests/queries/0_stateless/02111_function_mapExtractKeyLike.sql new file mode 100644 index 00000000000..31f53642b74 --- /dev/null +++ b/tests/queries/0_stateless/02111_function_mapExtractKeyLike.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS map_extractKeyLike_test; + +CREATE TABLE map_extractKeyLike_test (id UInt32, map Map(String, String)) Engine=MergeTree() ORDER BY id settings index_granularity=2; + +INSERT INTO map_extractKeyLike_test VALUES (1, {'P1-K1':'1-V1','P2-K2':'1-V2'}),(2,{'P1-K1':'2-V1','P2-K2':'2-V2'}); +INSERT INTO map_extractKeyLike_test VALUES (3, {'P1-K1':'3-V1','P2-K2':'3-V2'}),(4,{'P1-K1':'4-V1','P2-K2':'4-V2'}); +INSERT INTO map_extractKeyLike_test VALUES (5, {'5-K1':'5-V1','5-K2':'5-V2'}),(6, {'P3-K1':'6-V1','P4-K2':'6-V2'}); + +SELECT 'The data of table:'; +SELECT * FROM map_extractKeyLike_test ORDER BY id; + +SELECT ''; + +SELECT 'The results of query: SELECT id, mapExtractKeyLike(map, \'P1%\') FROM map_extractKeyLike_test ORDER BY id;'; +SELECT id, mapExtractKeyLike(map, 'P1%') FROM map_extractKeyLike_test ORDER BY id; + +SELECT ''; + +SELECT 'The results of query: SELECT id, mapExtractKeyLike(map, \'5-K1\') FROM map_extractKeyLike_test ORDER BY id;'; +SELECT id, mapExtractKeyLike(map, '5-K1') FROM map_extractKeyLike_test ORDER BY id; + +DROP TABLE map_extractKeyLike_test;