mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 01:22:04 +00:00
Merge branch 'master' into add-a-way-to-force-read-through-filesystem-cache-for-merges
This commit is contained in:
commit
9d76257464
@ -433,3 +433,292 @@ Result:
|
||||
│ [0,1,2,3,4,5,6,7] │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
## mortonEncode
|
||||
|
||||
Calculates the Morton encoding (ZCurve) for a list of unsigned integers.
|
||||
|
||||
The function has two modes of operation:
|
||||
- Simple
|
||||
- Expanded
|
||||
|
||||
### Simple mode
|
||||
|
||||
Accepts up to 8 unsigned integers as arguments and produces a UInt64 code.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mortonEncode(args)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A UInt64 code
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mortonEncode(1, 2, 3);
|
||||
```
|
||||
Result:
|
||||
|
||||
```response
|
||||
53
|
||||
```
|
||||
|
||||
### Expanded mode
|
||||
|
||||
Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments.
|
||||
|
||||
Each number in the mask configures the amount of range expansion:<br/>
|
||||
1 - no expansion<br/>
|
||||
2 - 2x expansion<br/>
|
||||
3 - 3x expansion<br/>
|
||||
...<br/>
|
||||
Up to 8x expansion.<br/>
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mortonEncode(range_mask, args)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
- `range_mask`: 1-8.
|
||||
- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
|
||||
|
||||
Note: when using columns for `args` the provided `range_mask` tuple should still be a constant.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A UInt64 code
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
|
||||
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mortonEncode((1,2), 1024, 16);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
1572864
|
||||
```
|
||||
|
||||
Note: tuple size must be equal to the number of the other arguments.
|
||||
|
||||
**Example**
|
||||
|
||||
Morton encoding for one argument is always the argument itself:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mortonEncode(1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
1
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
It is also possible to expand one argument too:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mortonEncode(tuple(2), 128);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
32768
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
You can also use column names in the function.
|
||||
|
||||
Query:
|
||||
|
||||
First create the table and insert some data.
|
||||
|
||||
```sql
|
||||
create table morton_numbers(
|
||||
n1 UInt32,
|
||||
n2 UInt32,
|
||||
n3 UInt16,
|
||||
n4 UInt16,
|
||||
n5 UInt8,
|
||||
n6 UInt8,
|
||||
n7 UInt8,
|
||||
n8 UInt8
|
||||
)
|
||||
Engine=MergeTree()
|
||||
ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into morton_numbers (*) values(1,2,3,4,5,6,7,8);
|
||||
```
|
||||
Use column names instead of constants as function arguments to `mortonEncode`
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8) FROM morton_numbers;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
2155374165
|
||||
```
|
||||
|
||||
**implementation details**
|
||||
|
||||
Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero.
|
||||
|
||||
## mortonDecode
|
||||
|
||||
Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple.
|
||||
|
||||
As with the `mortonEncode` function, this function has two modes of operation:
|
||||
- Simple
|
||||
- Expanded
|
||||
|
||||
### Simple mode
|
||||
|
||||
Accepts a resulting tuple size as the first argument and the code as the second argument.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mortonDecode(tuple_size, code)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
- `tuple_size`: integer value no more than 8.
|
||||
- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- [tuple](../../sql-reference/data-types/tuple.md) of the specified size.
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mortonDecode(3, 53);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
["1","2","3"]
|
||||
```
|
||||
|
||||
### Expanded mode
|
||||
|
||||
Accepts a range mask (tuple) as a first argument and the code as the second argument.
|
||||
Each number in the mask configures the amount of range shrink:<br/>
|
||||
1 - no shrink<br/>
|
||||
2 - 2x shrink<br/>
|
||||
3 - 3x shrink<br/>
|
||||
...<br/>
|
||||
Up to 8x shrink.<br/>
|
||||
|
||||
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
|
||||
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
|
||||
As with the encode function, this is limited to 8 numbers at most.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mortonDecode(1, 1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
["1"]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
It is also possible to shrink one argument:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT mortonDecode(tuple(2), 32768);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
["128"]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
You can also use column names in the function.
|
||||
|
||||
First create the table and insert some data.
|
||||
|
||||
Query:
|
||||
```sql
|
||||
create table morton_numbers(
|
||||
n1 UInt32,
|
||||
n2 UInt32,
|
||||
n3 UInt16,
|
||||
n4 UInt16,
|
||||
n5 UInt8,
|
||||
n6 UInt8,
|
||||
n7 UInt8,
|
||||
n8 UInt8
|
||||
)
|
||||
Engine=MergeTree()
|
||||
ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into morton_numbers (*) values(1,2,3,4,5,6,7,8);
|
||||
```
|
||||
Use column names instead of constants as function arguments to `mortonDecode`
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) from morton_numbers;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
1 2 3 4 5 6 7 8
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -11,79 +11,173 @@ elimination](../../sql-reference/functions/index.md#common-subexpression-elimina
|
||||
function return different random values.
|
||||
|
||||
Related content
|
||||
|
||||
- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse)
|
||||
|
||||
:::note
|
||||
The random numbers are generated by non-cryptographic algorithms.
|
||||
:::
|
||||
|
||||
## rand, rand32
|
||||
## rand
|
||||
|
||||
Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers.
|
||||
Returns a random UInt32 number with uniform distribution.
|
||||
|
||||
Uses a linear congruential generator.
|
||||
Uses a linear congruential generator with an initial state obtained from the system, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
rand()
|
||||
```
|
||||
|
||||
Alias: `rand32`
|
||||
|
||||
### Arguments
|
||||
|
||||
None.
|
||||
|
||||
### Returned value
|
||||
|
||||
Returns a number of type UInt32.
|
||||
|
||||
### Example
|
||||
|
||||
```sql
|
||||
SELECT rand();
|
||||
```
|
||||
|
||||
```response
|
||||
1569354847 -- Note: The actual output will be a random number, not the specific number shown in the example
|
||||
```
|
||||
|
||||
## rand64
|
||||
|
||||
Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers.
|
||||
Returns a random UInt64 integer (UInt64) number
|
||||
|
||||
Uses a linear congruential generator.
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
rand64()
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
None.
|
||||
|
||||
### Returned value
|
||||
|
||||
Returns a number UInt64 number with uniform distribution.
|
||||
|
||||
Uses a linear congruential generator with an initial state obtained from the system, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries.
|
||||
|
||||
### Example
|
||||
|
||||
```sql
|
||||
SELECT rand64();
|
||||
```
|
||||
|
||||
```response
|
||||
15030268859237645412 -- Note: The actual output will be a random number, not the specific number shown in the example.
|
||||
```
|
||||
|
||||
## randCanonical
|
||||
|
||||
Returns a random Float64 value, evenly distributed in interval [0, 1).
|
||||
Returns a random Float64 number.
|
||||
|
||||
### Syntax
|
||||
|
||||
```sql
|
||||
randCanonical()
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
None.
|
||||
|
||||
### Returned value
|
||||
|
||||
Returns a Float64 value between 0 (inclusive) and 1 (exclusive).
|
||||
|
||||
### Example
|
||||
|
||||
```sql
|
||||
SELECT randCanonical();
|
||||
```
|
||||
|
||||
```response
|
||||
0.3452178901234567 - Note: The actual output will be a random Float64 number between 0 and 1, not the specific number shown in the example.
|
||||
```
|
||||
|
||||
## randConstant
|
||||
|
||||
Like `rand` but produces a constant column with a random value.
|
||||
Generates a single constant column filled with a random value. Unlike `rand`, this function ensures the same random value appears in every row of the generated column, making it useful for scenarios requiring a consistent random seed across rows in a single query.
|
||||
|
||||
**Example**
|
||||
### Syntax
|
||||
|
||||
``` sql
|
||||
SELECT rand(), rand(1), rand(number), randConstant(), randConstant(1), randConstant(number)
|
||||
FROM numbers(3)
|
||||
```sql
|
||||
randConstant([x]);
|
||||
```
|
||||
|
||||
Result:
|
||||
### Arguments
|
||||
|
||||
``` result
|
||||
┌─────rand()─┬────rand(1)─┬─rand(number)─┬─randConstant()─┬─randConstant(1)─┬─randConstant(number)─┐
|
||||
│ 3047369878 │ 4132449925 │ 4044508545 │ 2740811946 │ 4229401477 │ 1924032898 │
|
||||
│ 2938880146 │ 1267722397 │ 4154983056 │ 2740811946 │ 4229401477 │ 1924032898 │
|
||||
│ 956619638 │ 4238287282 │ 1104342490 │ 2740811946 │ 4229401477 │ 1924032898 │
|
||||
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
|
||||
- **[x] (Optional):** An optional expression that influences the generated random value. Even if provided, the resulting value will still be constant within the same query execution. Different queries using the same expression will likely generate different constant values.
|
||||
|
||||
### Returned value
|
||||
|
||||
Returns a column of type UInt32 containing the same random value in each row.
|
||||
|
||||
### Implementation details
|
||||
|
||||
The actual output will be different for each query execution, even with the same optional expression. The optional parameter may not significantly change the generated value compared to using `randConstant` alone.
|
||||
|
||||
### Examples
|
||||
|
||||
```sql
|
||||
SELECT randConstant() AS random_value;
|
||||
```
|
||||
|
||||
```response
|
||||
| random_value |
|
||||
|--------------|
|
||||
| 1234567890 |
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT randConstant(10) AS random_value;
|
||||
```
|
||||
|
||||
```response
|
||||
| random_value |
|
||||
|--------------|
|
||||
| 9876543210 |
|
||||
```
|
||||
|
||||
## randUniform
|
||||
|
||||
Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)).
|
||||
Returns a random Float64 drawn uniformly from interval [`min`, `max`].
|
||||
|
||||
**Syntax**
|
||||
### Syntax
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randUniform(min, max)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
### Arguments
|
||||
|
||||
- `min` - `Float64` - left boundary of the range,
|
||||
- `max` - `Float64` - right boundary of the range.
|
||||
|
||||
**Returned value**
|
||||
### Returned value
|
||||
|
||||
- Random number.
|
||||
A random number of type [Float64](/docs/en/sql-reference/data-types/float.md).
|
||||
|
||||
Type: [Float64](/docs/en/sql-reference/data-types/float.md).
|
||||
### Example
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randUniform(5.5, 10) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```response
|
||||
┌─randUniform(5.5, 10)─┐
|
||||
│ 8.094978491443102 │
|
||||
│ 7.3181248914450885 │
|
||||
@ -99,7 +193,7 @@ Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randNormal(mean, variance)
|
||||
```
|
||||
|
||||
@ -116,13 +210,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randNormal(10, 2) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌──randNormal(10, 2)─┐
|
||||
│ 13.389228911709653 │
|
||||
│ 8.622949707401295 │
|
||||
@ -138,7 +232,7 @@ Returns a random Float64 drawn from a [log-normal distribution](https://en.wikip
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randLogNormal(mean, variance)
|
||||
```
|
||||
|
||||
@ -155,13 +249,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randLogNormal(100, 5) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─randLogNormal(100, 5)─┐
|
||||
│ 1.295699673937363e48 │
|
||||
│ 9.719869109186684e39 │
|
||||
@ -177,7 +271,7 @@ Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedi
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randBinomial(experiments, probability)
|
||||
```
|
||||
|
||||
@ -194,13 +288,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randBinomial(100, .75) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─randBinomial(100, 0.75)─┐
|
||||
│ 74 │
|
||||
│ 78 │
|
||||
@ -216,7 +310,7 @@ Returns a random UInt64 drawn from a [negative binomial distribution](https://en
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randNegativeBinomial(experiments, probability)
|
||||
```
|
||||
|
||||
@ -233,13 +327,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randNegativeBinomial(100, .75) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─randNegativeBinomial(100, 0.75)─┐
|
||||
│ 33 │
|
||||
│ 32 │
|
||||
@ -255,7 +349,7 @@ Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randPoisson(n)
|
||||
```
|
||||
|
||||
@ -271,13 +365,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randPoisson(10) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─randPoisson(10)─┐
|
||||
│ 8 │
|
||||
│ 8 │
|
||||
@ -293,7 +387,7 @@ Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikiped
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randBernoulli(probability)
|
||||
```
|
||||
|
||||
@ -309,13 +403,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randBernoulli(.75) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─randBernoulli(0.75)─┐
|
||||
│ 1 │
|
||||
│ 1 │
|
||||
@ -331,7 +425,7 @@ Returns a random Float64 drawn from a [exponential distribution](https://en.wiki
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randExponential(lambda)
|
||||
```
|
||||
|
||||
@ -347,13 +441,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randExponential(1/10) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─randExponential(divide(1, 10))─┐
|
||||
│ 44.71628934340778 │
|
||||
│ 4.211013337903262 │
|
||||
@ -369,7 +463,7 @@ Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikip
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randChiSquared(degree_of_freedom)
|
||||
```
|
||||
|
||||
@ -385,13 +479,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randChiSquared(10) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─randChiSquared(10)─┐
|
||||
│ 10.015463656521543 │
|
||||
│ 9.621799919882768 │
|
||||
@ -407,7 +501,7 @@ Returns a random Float64 drawn from a [Student's t-distribution](https://en.wiki
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randStudentT(degree_of_freedom)
|
||||
```
|
||||
|
||||
@ -423,13 +517,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randStudentT(10) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─────randStudentT(10)─┐
|
||||
│ 1.2217309938538725 │
|
||||
│ 1.7941971681200541 │
|
||||
@ -445,7 +539,7 @@ Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randFisherF(d1, d2)
|
||||
```
|
||||
|
||||
@ -462,13 +556,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randFisherF(10, 3) FROM numbers(5)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌──randFisherF(10, 3)─┐
|
||||
│ 7.286287504216609 │
|
||||
│ 0.26590779413050386 │
|
||||
@ -484,7 +578,7 @@ Generates a string of the specified length filled with random bytes (including z
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randomString(length)
|
||||
```
|
||||
|
||||
@ -502,13 +596,13 @@ Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
```text
|
||||
Row 1:
|
||||
──────
|
||||
str: 3 G : pT ?w тi k aV f6
|
||||
@ -526,7 +620,7 @@ Generates a binary string of the specified length filled with random bytes (incl
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randomFixedString(length);
|
||||
```
|
||||
|
||||
@ -563,7 +657,7 @@ If you pass `length < 0`, the behavior of the function is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randomPrintableASCII(length)
|
||||
```
|
||||
|
||||
@ -579,11 +673,11 @@ Type: [String](../../sql-reference/data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3
|
||||
```
|
||||
|
||||
``` text
|
||||
```text
|
||||
┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐
|
||||
│ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │
|
||||
│ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │
|
||||
@ -597,7 +691,7 @@ Generates a random string of a specified length. Result string contains valid UT
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
randomStringUTF8(length);
|
||||
```
|
||||
|
||||
@ -635,11 +729,12 @@ Flips the bits of String or FixedString `s`, each with probability `prob`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
fuzzBits(s, prob)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s` - `String` or `FixedString`,
|
||||
- `prob` - constant `Float32/64` between 0.0 and 1.0.
|
||||
|
||||
@ -649,14 +744,14 @@ Fuzzed string with same type as `s`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT fuzzBits(materialize('abacaba'), 0.1)
|
||||
FROM numbers(3)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
```result
|
||||
┌─fuzzBits(materialize('abacaba'), 0.1)─┐
|
||||
│ abaaaja │
|
||||
│ a*cjab+ │
|
||||
|
@ -968,7 +968,7 @@ Converts a numeric value to String with the number of fractional digits in the o
|
||||
toDecimalString(number, scale)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md),
|
||||
- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
@ -1261,7 +1261,7 @@ Converts input value `x` to the specified data type `T`. Always returns [Nullabl
|
||||
accurateCastOrNull(x, T)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `x` — Input value.
|
||||
- `T` — The name of the returned data type.
|
||||
@ -1314,7 +1314,7 @@ Converts input value `x` to the specified data type `T`. Returns default type va
|
||||
accurateCastOrDefault(x, T)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `x` — Input value.
|
||||
- `T` — The name of the returned data type.
|
||||
@ -1675,7 +1675,7 @@ Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also pa
|
||||
parseDateTime64BestEffort(time_string [, precision [, time_zone]])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md).
|
||||
- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
@ -1990,7 +1990,7 @@ Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wi
|
||||
snowflakeToDateTime(value[, time_zone])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
|
||||
@ -2026,7 +2026,7 @@ Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wi
|
||||
snowflakeToDateTime64(value[, time_zone])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
|
||||
@ -2062,7 +2062,7 @@ Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to th
|
||||
dateTimeToSnowflake(value)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md).
|
||||
|
||||
@ -2096,7 +2096,7 @@ Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the f
|
||||
dateTime64ToSnowflake(value)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
|
||||
|
||||
|
@ -155,7 +155,7 @@ Configuration example:
|
||||
cutToFirstSignificantSubdomain(URL, TLD)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
|
||||
@ -209,7 +209,7 @@ Configuration example:
|
||||
cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
|
||||
@ -263,7 +263,7 @@ Configuration example:
|
||||
firstSignificantSubdomainCustom(URL, TLD)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `URL` — URL. [String](../../sql-reference/data-types/string.md).
|
||||
- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
|
||||
|
@ -1756,7 +1756,8 @@ def main() -> int:
|
||||
result["build"] = build_digest
|
||||
result["docs"] = docs_digest
|
||||
result["ci_flags"] = ci_flags
|
||||
result["stages_data"] = _generate_ci_stage_config(jobs_data)
|
||||
if not args.skip_jobs:
|
||||
result["stages_data"] = _generate_ci_stage_config(jobs_data)
|
||||
result["jobs_data"] = jobs_data
|
||||
result["docker_data"] = docker_data
|
||||
### CONFIGURE action: end
|
||||
|
@ -288,6 +288,9 @@ class JobReport:
|
||||
# if False no GH commit status will be created by CI
|
||||
need_commit_status: bool = True
|
||||
|
||||
def __post_init__(self):
|
||||
assert self.status in (SUCCESS, ERROR, FAILURE, PENDING)
|
||||
|
||||
@classmethod
|
||||
def exist(cls) -> bool:
|
||||
return JOB_REPORT_FILE.is_file()
|
||||
|
@ -1,16 +1,17 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
import csv
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
|
||||
from docker_images_helper import get_docker_image, pull_image
|
||||
from env_helper import REPO_COPY, TEMP_PATH
|
||||
from env_helper import CI, REPO_COPY, TEMP_PATH
|
||||
from git_helper import GIT_PREFIX, git_runner
|
||||
from pr_info import PRInfo
|
||||
from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results
|
||||
@ -120,43 +121,70 @@ def checkout_last_ref(pr_info: PRInfo) -> None:
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.getLogger("git_helper").setLevel(logging.DEBUG)
|
||||
# args = parse_args()
|
||||
args = parse_args()
|
||||
|
||||
stopwatch = Stopwatch()
|
||||
|
||||
repo_path = Path(REPO_COPY)
|
||||
temp_path = Path(TEMP_PATH)
|
||||
if temp_path.is_dir():
|
||||
shutil.rmtree(temp_path)
|
||||
temp_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# pr_info = PRInfo()
|
||||
pr_info = PRInfo()
|
||||
|
||||
IMAGE_NAME = "clickhouse/style-test"
|
||||
image = pull_image(get_docker_image(IMAGE_NAME))
|
||||
cmd_1 = (
|
||||
cmd_cpp = (
|
||||
f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "
|
||||
f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output "
|
||||
f"--entrypoint= -w/ClickHouse/utils/check-style "
|
||||
f"{image} ./check_cpp_docs.sh"
|
||||
f"{image} ./check_cpp.sh"
|
||||
)
|
||||
cmd_2 = (
|
||||
|
||||
cmd_py = (
|
||||
f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "
|
||||
f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output "
|
||||
f"--entrypoint= -w/ClickHouse/utils/check-style "
|
||||
f"{image} ./check_py.sh"
|
||||
)
|
||||
logging.info("Is going to run the command: %s", cmd_1)
|
||||
logging.info("Is going to run the command: %s", cmd_2)
|
||||
|
||||
cmd_docs = (
|
||||
f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "
|
||||
f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output "
|
||||
f"--entrypoint= -w/ClickHouse/utils/check-style "
|
||||
f"{image} ./check_docs.sh"
|
||||
)
|
||||
|
||||
with ProcessPoolExecutor(max_workers=2) as executor:
|
||||
# Submit commands for execution in parallel
|
||||
future1 = executor.submit(subprocess.run, cmd_1, shell=True)
|
||||
future2 = executor.submit(subprocess.run, cmd_2, shell=True)
|
||||
# Wait for both commands to complete
|
||||
_ = future1.result()
|
||||
_ = future2.result()
|
||||
logging.info("Run docs files check: %s", cmd_docs)
|
||||
future = executor.submit(subprocess.run, cmd_docs, shell=True)
|
||||
# Parallelization does not make it faster - run subsequently
|
||||
_ = future.result()
|
||||
|
||||
# if args.push:
|
||||
# checkout_head(pr_info)
|
||||
run_cppcheck = True
|
||||
run_pycheck = True
|
||||
if CI and pr_info.number > 0:
|
||||
pr_info.fetch_changed_files()
|
||||
if not any(file.endswith(".py") for file in pr_info.changed_files):
|
||||
run_pycheck = False
|
||||
if all(file.endswith(".py") for file in pr_info.changed_files):
|
||||
run_cppcheck = False
|
||||
|
||||
if run_cppcheck:
|
||||
logging.info("Run source files check: %s", cmd_cpp)
|
||||
future1 = executor.submit(subprocess.run, cmd_cpp, shell=True)
|
||||
_ = future1.result()
|
||||
|
||||
if run_pycheck:
|
||||
if args.push:
|
||||
checkout_head(pr_info)
|
||||
logging.info("Run py files check: %s", cmd_py)
|
||||
future2 = executor.submit(subprocess.run, cmd_py, shell=True)
|
||||
_ = future2.result()
|
||||
if args.push:
|
||||
commit_push_staged(pr_info)
|
||||
checkout_last_ref(pr_info)
|
||||
|
||||
subprocess.check_call(
|
||||
f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} "
|
||||
@ -165,10 +193,6 @@ def main():
|
||||
shell=True,
|
||||
)
|
||||
|
||||
# if args.push:
|
||||
# commit_push_staged(pr_info)
|
||||
# checkout_last_ref(pr_info)
|
||||
|
||||
state, description, test_results, additional_files = process_result(temp_path)
|
||||
|
||||
JobReport(
|
||||
|
@ -260,6 +260,7 @@ ExactEdgeLengthRads
|
||||
ExecutablePool
|
||||
ExtType
|
||||
ExternalDistributed
|
||||
FFFFFFFF
|
||||
FFFD
|
||||
FIPS
|
||||
FOSDEM
|
||||
@ -546,6 +547,8 @@ MinIO
|
||||
MinMax
|
||||
MindsDB
|
||||
Mongodb
|
||||
mortonDecode
|
||||
mortonEncode
|
||||
MsgPack
|
||||
MultiPolygon
|
||||
Multiline
|
||||
@ -2743,6 +2746,7 @@ xz
|
||||
yaml
|
||||
yandex
|
||||
youtube
|
||||
ZCurve
|
||||
zLib
|
||||
zLinux
|
||||
zabbix
|
||||
|
@ -4,31 +4,35 @@
|
||||
|
||||
cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
|
||||
|
||||
start_total=`date +%s`
|
||||
|
||||
# FIXME: 30 sec to wait
|
||||
# echo "Check duplicates" | ts
|
||||
# ./check-duplicate-includes.sh |& tee /test_output/duplicate_includes_output.txt
|
||||
|
||||
echo "Check style" | ts
|
||||
start=`date +%s`
|
||||
./check-style -n |& tee /test_output/style_output.txt
|
||||
echo "Check typos" | ts
|
||||
./check-typos |& tee /test_output/typos_output.txt
|
||||
echo "Check docs spelling" | ts
|
||||
./check-doc-aspell |& tee /test_output/docs_spelling_output.txt
|
||||
echo "Check whitespaces" | ts
|
||||
runtime=$((`date +%s`-start))
|
||||
echo "Check style. Done. $runtime seconds."
|
||||
|
||||
start=`date +%s`
|
||||
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
|
||||
echo "Check workflows" | ts
|
||||
runtime=$((`date +%s`-start))
|
||||
echo "Check whitespaces. Done. $runtime seconds."
|
||||
|
||||
start=`date +%s`
|
||||
./check-workflows |& tee /test_output/workflows_output.txt
|
||||
echo "Check submodules" | ts
|
||||
runtime=$((`date +%s`-start))
|
||||
echo "Check workflows. Done. $runtime seconds."
|
||||
|
||||
start=`date +%s`
|
||||
./check-submodules |& tee /test_output/submodules_output.txt
|
||||
echo "Check style. Done" | ts
|
||||
runtime=$((`date +%s`-start))
|
||||
echo "Check submodules. Done. $runtime seconds."
|
||||
|
||||
# FIXME: 6 min to wait
|
||||
# echo "Check shell scripts with shellcheck" | ts
|
||||
# ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
|
||||
|
||||
|
||||
# FIXME: move out
|
||||
# /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
|
||||
# echo "Check help for changelog generator works" | ts
|
||||
# cd ../changelog || exit 1
|
||||
# ./changelog.py -h 2>/dev/null 1>&2
|
||||
runtime=$((`date +%s`-start_total))
|
||||
echo "Check style total. Done. $runtime seconds."
|
20
utils/check-style/check_docs.sh
Executable file
20
utils/check-style/check_docs.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
# yaml check is not the best one
|
||||
|
||||
cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
|
||||
|
||||
start_total=`date +%s`
|
||||
|
||||
start=`date +%s`
|
||||
./check-typos |& tee /test_output/typos_output.txt
|
||||
runtime=$((`date +%s`-start))
|
||||
echo "Check typos. Done. $runtime seconds."
|
||||
|
||||
start=`date +%s`
|
||||
./check-doc-aspell |& tee /test_output/docs_spelling_output.txt
|
||||
runtime=$((`date +%s`-start))
|
||||
echo "Check docs spelling. Done. $runtime seconds."
|
||||
|
||||
runtime=$((`date +%s`-start_total))
|
||||
echo "Check Docs, total. Done. $runtime seconds."
|
@ -1,17 +1,22 @@
|
||||
#!/bin/bash
|
||||
|
||||
# yaml check is not the best one
|
||||
|
||||
cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
|
||||
|
||||
start_total=`date +%s`
|
||||
|
||||
# FIXME: 1 min to wait + head checkout
|
||||
# echo "Check python formatting with black" | ts
|
||||
# ./check-black -n |& tee /test_output/black_output.txt
|
||||
echo "Check python formatting with black" | ts
|
||||
./check-black -n |& tee /test_output/black_output.txt
|
||||
|
||||
echo "Check pylint" | ts
|
||||
start=`date +%s`
|
||||
./check-pylint -n |& tee /test_output/pylint_output.txt
|
||||
echo "Check pylint. Done" | ts
|
||||
runtime=$((`date +%s`-start))
|
||||
echo "Check pylint. Done. $runtime seconds."
|
||||
|
||||
echo "Check python type hinting with mypy" | ts
|
||||
start=`date +%s`
|
||||
./check-mypy -n |& tee /test_output/mypy_output.txt
|
||||
echo "Check python type hinting with mypy. Done" | ts
|
||||
runtime=$((`date +%s`-start))
|
||||
echo "Check python type hinting with mypy. Done. $runtime seconds."
|
||||
|
||||
runtime=$((`date +%s`-start_total))
|
||||
echo "Check python total. Done. $runtime seconds."
|
||||
|
@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import logging
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
# TODO: add typing and log files to the fourth column, think about launching
|
||||
@ -13,11 +13,11 @@ def process_result(result_folder):
|
||||
description = ""
|
||||
test_results = []
|
||||
checks = (
|
||||
#"duplicate includes",
|
||||
#"shellcheck",
|
||||
# "duplicate includes",
|
||||
# "shellcheck",
|
||||
"style",
|
||||
"pylint",
|
||||
#"black",
|
||||
"black",
|
||||
"mypy",
|
||||
"typos",
|
||||
"whitespaces",
|
||||
@ -30,11 +30,15 @@ def process_result(result_folder):
|
||||
out_file = name.replace(" ", "_") + "_output.txt"
|
||||
full_path = os.path.join(result_folder, out_file)
|
||||
if not os.path.exists(full_path):
|
||||
logging.info("No %s check log on path %s", name, full_path)
|
||||
return "exception", f"No {name} check log", []
|
||||
test_results.append((f"Check {name}", "SKIPPED"))
|
||||
elif os.stat(full_path).st_size != 0:
|
||||
with open(full_path, "r") as file:
|
||||
lines = file.readlines()
|
||||
if len(lines) > 100:
|
||||
lines = lines[:100] + ["====TRIMMED===="]
|
||||
content = "\n".join(lines)
|
||||
description += f"Check {name} failed. "
|
||||
test_results.append((f"Check {name}", "FAIL"))
|
||||
test_results.append((f"Check {name}", "FAIL", None, content))
|
||||
status = "failure"
|
||||
else:
|
||||
test_results.append((f"Check {name}", "OK"))
|
||||
@ -42,6 +46,8 @@ def process_result(result_folder):
|
||||
if not description:
|
||||
description += "Style check success"
|
||||
|
||||
assert test_results, "No single style-check output found"
|
||||
|
||||
return status, description, test_results
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user