Revert "remove partitionID function, update tests and remove from documentation"

This reverts commit c3b72386a2.
2024-09-20 00:30:49 +00:00 · 2024-07-13 20:06:37 +00:00 · 2024-07-13 20:06:37 +00:00 · eb9a629868
commit eb9a629868
parent cef13cfa72
4 changed files with 135 additions and 4 deletions
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@ -2984,6 +2984,66 @@ Result:
 └─────────┘
 ```

+## partitionId
+
+Returns computed [partition](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) IDs of its arguments.
+
+:::note
+This function is slow and should not be called for large amount of rows.
+:::
+
+**Syntax**
+
+```sql
+partitionId(x[, y, ...]);
+```
+
+**Arguments**
+
+- `x` — Column for which to return the partition ID.
+- `y, ...` — Remaining N columns for which to return the partition ID (optional).
+
+**Return Type**
+
+- Partition ID that the row belongs to. [String](../data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+DROP TABLE IF EXISTS mt;
+CREATE TABLE mt
+(
+  `i` int,
+  `j` int
+)
+ENGINE = MergeTree
+PARTITION BY i
+ORDER BY j
+SETTINGS index_granularity = 1;
+
+INSERT INTO mt VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6);
+
+SELECT * FROM mt WHERE _partition_id = partitionId(1);
+SELECT * FROM mt WHERE _partition_id = partitionId(2);
+```
+Result:
+
+```response
+   ┌─i─┬─j─┐
+1. │ 1 │ 1 │
+2. │ 1 │ 2 │
+3. │ 1 │ 3 │
+   └───┴───┘
+   ┌─i─┬─j─┐
+1. │ 2 │ 4 │
+2. │ 2 │ 5 │
+3. │ 2 │ 6 │
+   └───┴───┘
+```
+
+
 ## shardNum

 Returns the index of a shard which processes a part of data in a distributed query. Indices are started from `1`.
--- a/src/Functions/partitionId.cpp
+++ b/src/Functions/partitionId.cpp
@ -0,0 +1,71 @@
+#include <memory>
+#include <Columns/ColumnString.h>
+#include <Core/Block.h>
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/IFunction.h>
+#include <Storages/MergeTree/MergeTreePartition.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+
+/** partitionId(x, y, ...) is a function that computes partition ids of arguments.
+  * The function is slow and should not be called for large amount of rows.
+  */
+class FunctionPartitionId : public IFunction
+{
+public:
+    static constexpr auto name = "partitionId";
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionPartitionId>(); }
+
+    String getName() const override { return name; }
+
+    bool isVariadic() const override { return true; }
+
+    size_t getNumberOfArguments() const override { return 0; }
+
+    bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; }
+
+    bool useDefaultImplementationForNulls() const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.empty())
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName());
+
+        return std::make_shared<DataTypeString>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        Block sample_block(arguments);
+        size_t size = arguments.size();
+
+        auto result_column = ColumnString::create();
+        for (size_t j = 0; j < input_rows_count; ++j)
+        {
+            Row row(size);
+            for (size_t i = 0; i < size; ++i)
+                arguments[i].column->get(j, row[i]);
+            MergeTreePartition partition(std::move(row));
+            result_column->insert(partition.getID(sample_block));
+        }
+        return result_column;
+    }
+};
+
+REGISTER_FUNCTION(PartitionId)
+{
+    factory.registerFunction<FunctionPartitionId>();
+}
+
+}
--- a/tests/integration/test_lost_part/test.py
+++ b/tests/integration/test_lost_part/test.py
@ -266,7 +266,7 @@ def test_lost_last_part(start_cluster):
            "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"}
        )

-        partition_id = node1.query("select _partition_id").strip()
+        partition_id = node1.query("select partitionId('x')").strip()
        remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0")

        # other way to detect broken parts
--- a/tests/queries/0_stateless/01748_partition_id_pruning.sql
+++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql
@ -8,17 +8,17 @@ insert into x values (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6);

 set max_rows_to_read = 3;

-select * from x where _partition_id = '1';
+select * from x where _partition_id = partitionId(1);

 set max_rows_to_read = 5; -- one row for subquery + subquery

-select * from x where _partition_id in (select number + 1 from numbers(1));
+select * from x where _partition_id in (select partitionId(number + 1) from numbers(1));

 -- trivial count optimization test
 set max_rows_to_read = 2; -- one row for subquery + subquery itself
 -- TODO: Relax the limits because we might build prepared set twice with _minmax_count_projection
 set max_rows_to_read = 3;
-select count() from x where _partition_id in (select number + 1 from numbers(1));
+select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1));

 drop table x;