Fixing window functions distributed tests by moving to using deterministic sharding key.

2024-09-20 16:50:48 +00:00 · 2021-05-08 15:06:31 -04:00 · 2021-05-08 15:06:31 -04:00 · b303ebc7e3
commit b303ebc7e3
parent 7c948139fc
3 changed files with 24 additions and 8 deletions
--- a/tests/testflows/window_functions/requirements/requirements.md
+++ b/tests/testflows/window_functions/requirements/requirements.md
@ -202,7 +202,13 @@ All the updates are tracked using the [Revision History].
 ## Introduction
-This software requirements specification covers requirements for `Map(key, value)` data type in [ClickHouse].
+This software requirements specification covers requirements for supporting window functions in [ClickHouse].
 Similar functionality exists in [MySQL] and [PostreSQL]. [PostreSQL] defines a window function as follows:
 > A window function performs a calculation across a set of table rows that are somehow related to the current row.
 > This is comparable to the type of calculation that can be done with an aggregate function.
 > But unlike regular aggregate functions, use of a window function does not cause rows to
 > become grouped into a single output row — the rows retain their separate identities.
 ## Requirements
@ -2290,3 +2296,5 @@ version: 1.0
 [Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/window_functions/requirements/requirements.md
 [Git]: https://git-scm.com/
 [GitHub]: https://github.com
 [PostreSQL]: https://www.postgresql.org/docs/9.2/tutorial-window.html
 [MySQL]: https://dev.mysql.com/doc/refman/8.0/en/window-functions.html
--- a/tests/testflows/window_functions/requirements/requirements.py
+++ b/tests/testflows/window_functions/requirements/requirements.py
@ -1,6 +1,6 @@
 # These requirements were auto generated
 # from software requirements specification (SRS)
-# document by TestFlows v1.6.210312.1172513.
+# document by TestFlows v1.6.210505.1133630.
 # Do not edit by hand but re-generate instead
 # using 'tfs requirements generate' command.
 from testflows.core import Specification
@ -3796,7 +3796,13 @@ All the updates are tracked using the [Revision History].
 ## Introduction
-This software requirements specification covers requirements for `Map(key, value)` data type in [ClickHouse].
+This software requirements specification covers requirements for supporting window functions in [ClickHouse].
 Similar functionality exists in [MySQL] and [PostreSQL]. [PostreSQL] defines a window function as follows:
 > A window function performs a calculation across a set of table rows that are somehow related to the current row.
 > This is comparable to the type of calculation that can be done with an aggregate function.
 > But unlike regular aggregate functions, use of a window function does not cause rows to
 > become grouped into a single output row — the rows retain their separate identities.
 ## Requirements
@ -5884,4 +5890,6 @@ version: 1.0
 [Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/window_functions/requirements/requirements.md
 [Git]: https://git-scm.com/
 [GitHub]: https://github.com
 [PostreSQL]: https://www.postgresql.org/docs/9.2/tutorial-window.html
 [MySQL]: https://dev.mysql.com/doc/refman/8.0/en/window-functions.html
 ''')
--- a/tests/testflows/window_functions/tests/common.py
+++ b/tests/testflows/window_functions/tests/common.py
@ -100,7 +100,7 @@ def t1_table(self, name="t1", distributed=False):
            create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster")
        with And("a distributed table"):
-            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())"
+            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, f1 % toUInt8(getMacro('shard')))"
            table = create_table(name=name, statement=sql)
        with And("populating table with data"):
@ -155,7 +155,7 @@ def datetimes_table(self, name="datetimes", distributed=False):
            create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster")
        with And("a distributed table"):
-            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())"
+            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, id % toUInt8(getMacro('shard')))"
            table = create_table(name=name, statement=sql)
        with And("populating table with data"):
@ -213,7 +213,7 @@ def numerics_table(self, name="numerics", distributed=False):
            create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster")
        with And("a distributed table"):
-            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())"
+            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, id % toUInt8(getMacro('shard')))"
            table = create_table(name=name, statement=sql)
        with And("populating table with data"):
@ -282,7 +282,7 @@ def tenk1_table(self, name="tenk1", distributed=False):
            create_table(name=name + '_source', statement=sql, on_cluster="sharded_cluster")
        with And("a distributed table"):
-            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())"
+            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, unique1 % toUInt8(getMacro('shard')))"
            table = create_table(name=name, statement=sql)
        with And("populating table with data"):
@ -353,7 +353,7 @@ def empsalary_table(self, name="empsalary", distributed=False):
                create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster")
        with And("a distributed table"):
-            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())"
+            sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, empno % toUInt8(getMacro('shard')))"
            table = create_table(name=name, statement=sql)
        with And("populating distributed table with data"):