Fixing window functions distributed tests by moving to using deterministic sharding key.

This commit is contained in:
Vitaliy Zakaznikov 2021-05-08 15:06:31 -04:00
parent 7c948139fc
commit b303ebc7e3
3 changed files with 24 additions and 8 deletions

View File

@ -202,7 +202,13 @@ All the updates are tracked using the [Revision History].
## Introduction ## Introduction
This software requirements specification covers requirements for `Map(key, value)` data type in [ClickHouse]. This software requirements specification covers requirements for supporting window functions in [ClickHouse].
Similar functionality exists in [MySQL] and [PostreSQL]. [PostreSQL] defines a window function as follows:
> A window function performs a calculation across a set of table rows that are somehow related to the current row.
> This is comparable to the type of calculation that can be done with an aggregate function.
> But unlike regular aggregate functions, use of a window function does not cause rows to
> become grouped into a single output row — the rows retain their separate identities.
## Requirements ## Requirements
@ -2290,3 +2296,5 @@ version: 1.0
[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/window_functions/requirements/requirements.md [Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/window_functions/requirements/requirements.md
[Git]: https://git-scm.com/ [Git]: https://git-scm.com/
[GitHub]: https://github.com [GitHub]: https://github.com
[PostreSQL]: https://www.postgresql.org/docs/9.2/tutorial-window.html
[MySQL]: https://dev.mysql.com/doc/refman/8.0/en/window-functions.html

View File

@ -1,6 +1,6 @@
# These requirements were auto generated # These requirements were auto generated
# from software requirements specification (SRS) # from software requirements specification (SRS)
# document by TestFlows v1.6.210312.1172513. # document by TestFlows v1.6.210505.1133630.
# Do not edit by hand but re-generate instead # Do not edit by hand but re-generate instead
# using 'tfs requirements generate' command. # using 'tfs requirements generate' command.
from testflows.core import Specification from testflows.core import Specification
@ -3796,7 +3796,13 @@ All the updates are tracked using the [Revision History].
## Introduction ## Introduction
This software requirements specification covers requirements for `Map(key, value)` data type in [ClickHouse]. This software requirements specification covers requirements for supporting window functions in [ClickHouse].
Similar functionality exists in [MySQL] and [PostreSQL]. [PostreSQL] defines a window function as follows:
> A window function performs a calculation across a set of table rows that are somehow related to the current row.
> This is comparable to the type of calculation that can be done with an aggregate function.
> But unlike regular aggregate functions, use of a window function does not cause rows to
> become grouped into a single output row the rows retain their separate identities.
## Requirements ## Requirements
@ -5884,4 +5890,6 @@ version: 1.0
[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/window_functions/requirements/requirements.md [Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/window_functions/requirements/requirements.md
[Git]: https://git-scm.com/ [Git]: https://git-scm.com/
[GitHub]: https://github.com [GitHub]: https://github.com
[PostreSQL]: https://www.postgresql.org/docs/9.2/tutorial-window.html
[MySQL]: https://dev.mysql.com/doc/refman/8.0/en/window-functions.html
''') ''')

View File

@ -100,7 +100,7 @@ def t1_table(self, name="t1", distributed=False):
create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster") create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster")
with And("a distributed table"): with And("a distributed table"):
sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())" sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, f1 % toUInt8(getMacro('shard')))"
table = create_table(name=name, statement=sql) table = create_table(name=name, statement=sql)
with And("populating table with data"): with And("populating table with data"):
@ -155,7 +155,7 @@ def datetimes_table(self, name="datetimes", distributed=False):
create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster") create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster")
with And("a distributed table"): with And("a distributed table"):
sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())" sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, id % toUInt8(getMacro('shard')))"
table = create_table(name=name, statement=sql) table = create_table(name=name, statement=sql)
with And("populating table with data"): with And("populating table with data"):
@ -213,7 +213,7 @@ def numerics_table(self, name="numerics", distributed=False):
create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster") create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster")
with And("a distributed table"): with And("a distributed table"):
sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())" sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, id % toUInt8(getMacro('shard')))"
table = create_table(name=name, statement=sql) table = create_table(name=name, statement=sql)
with And("populating table with data"): with And("populating table with data"):
@ -282,7 +282,7 @@ def tenk1_table(self, name="tenk1", distributed=False):
create_table(name=name + '_source', statement=sql, on_cluster="sharded_cluster") create_table(name=name + '_source', statement=sql, on_cluster="sharded_cluster")
with And("a distributed table"): with And("a distributed table"):
sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())" sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, unique1 % toUInt8(getMacro('shard')))"
table = create_table(name=name, statement=sql) table = create_table(name=name, statement=sql)
with And("populating table with data"): with And("populating table with data"):
@ -353,7 +353,7 @@ def empsalary_table(self, name="empsalary", distributed=False):
create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster") create_table(name=name + "_source", statement=sql, on_cluster="sharded_cluster")
with And("a distributed table"): with And("a distributed table"):
sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, rand())" sql = "CREATE TABLE {name} AS " + name + '_source' + " ENGINE = Distributed(sharded_cluster, default, " + f"{name + '_source'}, empno % toUInt8(getMacro('shard')))"
table = create_table(name=name, statement=sql) table = create_table(name=name, statement=sql)
with And("populating distributed table with data"): with And("populating distributed table with data"):