Write a script to automatically update the dict

This commit is contained in:
Pablo Marcos 2024-08-13 14:18:48 +00:00
parent 16fd24fb1f
commit 79e0557839
5 changed files with 1389 additions and 9732 deletions

View File

@ -1,23 +0,0 @@
The list of functions generated via the following query
```
clickhouse client -q "SELECT * FROM (SELECT DISTINCT concat('\"', name, '\"') as res FROM system.functions ORDER BY name UNION ALL SELECT concat('\"', a.name, b.name, '\"') as res FROM system.functions as a CROSS JOIN system.aggregate_function_combinators as b WHERE a.is_aggregate = 1) ORDER BY res" > functions.dict
```
The list of datatypes generated via the following query:
```
clickhouse client -q "SELECT DISTINCT concat('\"', name, '\"') as res FROM system.data_type_families ORDER BY name" > datatypes.dict
```
The list of keywords generated via the following query:
```
clickhouse client -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > keywords.dict
```
Then merge all dictionaries into one (all.dict)
```
cat ./dictionaries/* | LC_ALL=C sort | uniq > all.dict
```

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

20
tests/fuzz/update_dict.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR=$(dirname "$(realpath "$0")")
ROOT_PATH="$(git rev-parse --show-toplevel)"
CLICKHOUSE_BIN="${CLICKHOUSE_BIN:-$ROOT_PATH/build/programs/clickhouse}"
DICTIONARIES_DIR="$SCRIPT_DIR/dictionaries"
echo "Generating functions dict"
$CLICKHOUSE_BIN local -q "SELECT * FROM (SELECT DISTINCT concat('\"', name, '\"') as res FROM system.functions ORDER BY name UNION ALL SELECT concat('\"', a.name, b.name, '\"') as res FROM system.functions as a CROSS JOIN system.aggregate_function_combinators as b WHERE a.is_aggregate = 1) ORDER BY res" > "$DICTIONARIES_DIR/functions.dict"
echo "Generating data types dict"
$CLICKHOUSE_BIN local -q "SELECT DISTINCT concat('\"', name, '\"') as res FROM system.data_type_families ORDER BY name" > "$DICTIONARIES_DIR/datatypes.dict"
echo "Generating keywords dict"
$CLICKHOUSE_BIN local -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > "$DICTIONARIES_DIR/keywords.dict"
echo "Merging dictionaries into all.dict"
cat "$DICTIONARIES_DIR"/* | LC_ALL=C sort | uniq > "$SCRIPT_DIR/all.dict"