mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge remote-tracking branch 'upstream/master' into group_by_all
This commit is contained in:
commit
4e1b96fa4c
@ -10,7 +10,7 @@ ClickHouse® is an open-source column-oriented database management system that a
|
|||||||
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
||||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||||
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||||
* [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
||||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||||
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
|
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
|
||||||
|
@ -5,6 +5,7 @@ FROM ubuntu:20.04
|
|||||||
ARG apt_archive="http://archive.ubuntu.com"
|
ARG apt_archive="http://archive.ubuntu.com"
|
||||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||||
|
|
||||||
|
# 15.0.2
|
||||||
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
|
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
@ -58,6 +59,9 @@ RUN apt-get update \
|
|||||||
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||||
# for external_symbolizer_path
|
# for external_symbolizer_path
|
||||||
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
|
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
|
||||||
|
# FIXME: workaround for "The imported target "merge-fdata" references the file" error
|
||||||
|
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
|
||||||
|
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
|
||||||
|
|
||||||
ARG CCACHE_VERSION=4.6.1
|
ARG CCACHE_VERSION=4.6.1
|
||||||
RUN mkdir /tmp/ccache \
|
RUN mkdir /tmp/ccache \
|
||||||
|
@ -1,14 +0,0 @@
|
|||||||
---
|
|
||||||
slug: /en/development/browse-code
|
|
||||||
sidebar_label: Source Code Browser
|
|
||||||
sidebar_position: 72
|
|
||||||
description: Various ways to browse and edit the source code
|
|
||||||
---
|
|
||||||
|
|
||||||
# Browse ClickHouse Source Code
|
|
||||||
|
|
||||||
You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily.
|
|
||||||
|
|
||||||
Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual.
|
|
||||||
|
|
||||||
If you’re interested what IDE to use, we recommend CLion, QT Creator, VS Code and KDevelop (with caveats). You can use any favorite IDE. Vim and Emacs also count.
|
|
@ -1,14 +0,0 @@
|
|||||||
---
|
|
||||||
slug: /ru/development/browse-code
|
|
||||||
sidebar_position: 72
|
|
||||||
sidebar_label: "Навигация по коду ClickHouse"
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
||||||
# Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse}
|
|
||||||
|
|
||||||
Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно.
|
|
||||||
|
|
||||||
Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse).
|
|
||||||
|
|
||||||
Если вы интересуетесь, какую среду разработки выбрать для работы с ClickHouse, мы рекомендуем CLion, QT Creator, VSCode или KDevelop (с некоторыми предостережениями). Вы можете использовать свою любимую среду разработки, Vim и Emacs тоже считаются.
|
|
@ -1,13 +0,0 @@
|
|||||||
---
|
|
||||||
slug: /zh/development/browse-code
|
|
||||||
sidebar_position: 63
|
|
||||||
sidebar_label: "\u6D4F\u89C8\u6E90\u4EE3\u7801"
|
|
||||||
---
|
|
||||||
|
|
||||||
# 浏览ClickHouse源代码 {#browse-clickhouse-source-code}
|
|
||||||
|
|
||||||
您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。
|
|
||||||
|
|
||||||
此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse)
|
|
||||||
|
|
||||||
如果你希望了解哪种IDE较好,我们推荐使用CLion,QT Creator,VS Code和KDevelop(有注意事项)。 您可以使用任何您喜欢的IDE。 Vim和Emacs也可以。
|
|
@ -1,9 +1,232 @@
|
|||||||
---
|
---
|
||||||
slug: /zh/getting-started/example-datasets/cell-towers
|
slug: /zh/getting-started/example-datasets/cell-towers
|
||||||
sidebar_label: Cell Towers
|
sidebar_label: 蜂窝信号塔
|
||||||
title: "Cell Towers"
|
sidebar_position: 3
|
||||||
|
title: "蜂窝信号塔"
|
||||||
---
|
---
|
||||||
|
|
||||||
import Content from '@site/docs/en/getting-started/example-datasets/cell-towers.md';
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
import CodeBlock from '@theme/CodeBlock';
|
||||||
|
import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md';
|
||||||
|
import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md';
|
||||||
|
|
||||||
|
该数据集来自 [OpenCellid](https://www.opencellid.org/) - 世界上最大的蜂窝信号塔的开放数据库。
|
||||||
|
|
||||||
|
截至 2021 年,它拥有超过 4000 万条关于全球蜂窝信号塔(GSM、LTE、UMTS 等)的记录及其地理坐标和元数据(国家代码、网络等)。
|
||||||
|
|
||||||
|
OpenCelliD 项目在 `Creative Commons Attribution-ShareAlike 4.0 International License` 协议下许可使用,我们根据相同许可条款重新分发此数据集的快照。登录后即可下载最新版本的数据集。
|
||||||
|
|
||||||
|
|
||||||
|
## 获取数据集 {#get-the-dataset}
|
||||||
|
|
||||||
|
<Tabs groupId="deployMethod">
|
||||||
|
<TabItem value="serverless" label="ClickHouse Cloud" default>
|
||||||
|
|
||||||
|
在 ClickHouse Cloud 上可以通过一个按钮实现通过 S3 上传此数据集。登录你的 ClickHouse Cloud 组织,或通过 [ClickHouse.cloud](https://clickhouse.cloud) 创建免费试用版。<ActionsMenu menu="Load Data" />
|
||||||
|
|
||||||
|
从 **Sample data** 选项卡中选择 **Cell Towers** 数据集,然后选择 **Load data**:
|
||||||
|
|
||||||
|
![加载数据集](@site/docs/en/_snippets/images/cloud-load-data-sample.png)
|
||||||
|
|
||||||
|
检查 cell_towers 的表结构:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
DESCRIBE TABLE cell_towers
|
||||||
|
```
|
||||||
|
|
||||||
|
<SQLConsoleDetail />
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="selfmanaged" label="Self-managed">
|
||||||
|
|
||||||
|
1. 下载 2021 年 2 月以来的数据集快照:[cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB)。
|
||||||
|
|
||||||
|
2. 验证完整性(可选步骤):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
md5sum cell_towers.csv.xz
|
||||||
|
```
|
||||||
|
|
||||||
|
```response
|
||||||
|
8cf986f4a0d9f12c6f384a0e9192c908 cell_towers.csv.xz
|
||||||
|
```
|
||||||
|
|
||||||
|
3. 使用以下命令解压:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
xz -d cell_towers.csv.xz
|
||||||
|
```
|
||||||
|
|
||||||
|
4. 创建表:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE cell_towers
|
||||||
|
(
|
||||||
|
radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5),
|
||||||
|
mcc UInt16,
|
||||||
|
net UInt16,
|
||||||
|
area UInt16,
|
||||||
|
cell UInt64,
|
||||||
|
unit Int16,
|
||||||
|
lon Float64,
|
||||||
|
lat Float64,
|
||||||
|
range UInt32,
|
||||||
|
samples UInt32,
|
||||||
|
changeable UInt8,
|
||||||
|
created DateTime,
|
||||||
|
updated DateTime,
|
||||||
|
averageSignal UInt8
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
|
||||||
|
```
|
||||||
|
|
||||||
|
5. 插入数据集:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## 查询示例 {#examples}
|
||||||
|
|
||||||
|
1. 按类型划分的基站数量:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC
|
||||||
|
```
|
||||||
|
```response
|
||||||
|
┌─radio─┬────────c─┐
|
||||||
|
│ UMTS │ 20686487 │
|
||||||
|
│ LTE │ 12101148 │
|
||||||
|
│ GSM │ 9931312 │
|
||||||
|
│ CDMA │ 556344 │
|
||||||
|
│ NR │ 867 │
|
||||||
|
└───────┴──────────┘
|
||||||
|
|
||||||
|
5 rows in set. Elapsed: 0.011 sec. Processed 43.28 million rows, 43.28 MB (3.83 billion rows/s., 3.83 GB/s.)
|
||||||
|
```
|
||||||
|
|
||||||
|
2. 各个[移动国家代码(MCC)](https://en.wikipedia.org/wiki/Mobile_country_code)对应的蜂窝信号塔数量:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10
|
||||||
|
```
|
||||||
|
```response
|
||||||
|
┌─mcc─┬─count()─┐
|
||||||
|
│ 310 │ 5024650 │
|
||||||
|
│ 262 │ 2622423 │
|
||||||
|
│ 250 │ 1953176 │
|
||||||
|
│ 208 │ 1891187 │
|
||||||
|
│ 724 │ 1836150 │
|
||||||
|
│ 404 │ 1729151 │
|
||||||
|
│ 234 │ 1618924 │
|
||||||
|
│ 510 │ 1353998 │
|
||||||
|
│ 440 │ 1343355 │
|
||||||
|
│ 311 │ 1332798 │
|
||||||
|
└─────┴─────────┘
|
||||||
|
|
||||||
|
10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.)
|
||||||
|
```
|
||||||
|
|
||||||
|
排名靠前的国家是:美国、德国和俄罗斯。
|
||||||
|
|
||||||
|
你可以通过在 ClickHouse 中创建一个 [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 来解码这些值。
|
||||||
|
|
||||||
|
## 用例:合并地理数据 {#use-case}
|
||||||
|
|
||||||
|
使用 `pointInPolygon` 函数。
|
||||||
|
|
||||||
|
1. 创建一个用于存储多边形的表:
|
||||||
|
|
||||||
|
<Tabs groupId="deployMethod">
|
||||||
|
<TabItem value="serverless" label="ClickHouse Cloud" default>
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE moscow (polygon Array(Tuple(Float64, Float64)))
|
||||||
|
ORDER BY polygon;
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="selfmanaged" label="Self-managed">
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TEMPORARY TABLE
|
||||||
|
moscow (polygon Array(Tuple(Float64, Float64)));
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
2. 以下点大致上构造了莫斯科的地理围栏(除“新莫斯科”外):
|
||||||
|
|
||||||
|
```sql
|
||||||
|
INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266),
|
||||||
|
(37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554),
|
||||||
|
(37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413),
|
||||||
|
(37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372),
|
||||||
|
(37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784),
|
||||||
|
(37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089),
|
||||||
|
(37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608),
|
||||||
|
(37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335),
|
||||||
|
(37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639),
|
||||||
|
(37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552),
|
||||||
|
(37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121),
|
||||||
|
(37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455),
|
||||||
|
(37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279),
|
||||||
|
(37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446),
|
||||||
|
(37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373),
|
||||||
|
(37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915),
|
||||||
|
(37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051),
|
||||||
|
(37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785),
|
||||||
|
(37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155),
|
||||||
|
(37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229),
|
||||||
|
(37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064),
|
||||||
|
(37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576),
|
||||||
|
(37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014),
|
||||||
|
(37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414),
|
||||||
|
(37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686),
|
||||||
|
(37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811),
|
||||||
|
(37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614),
|
||||||
|
(37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725),
|
||||||
|
(37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266),
|
||||||
|
(37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804),
|
||||||
|
(37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979),
|
||||||
|
(37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975),
|
||||||
|
(37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751),
|
||||||
|
(37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635),
|
||||||
|
(37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249),
|
||||||
|
(37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802),
|
||||||
|
(37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586),
|
||||||
|
(37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106),
|
||||||
|
(37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566),
|
||||||
|
(37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865),
|
||||||
|
(37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505),
|
||||||
|
(37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554),
|
||||||
|
(37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488),
|
||||||
|
(37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761),
|
||||||
|
(37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134),
|
||||||
|
(37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492),
|
||||||
|
(37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685),
|
||||||
|
(37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368),
|
||||||
|
(37.84172564285271, 55.78000432402266)]);
|
||||||
|
```
|
||||||
|
|
||||||
|
3. 检查莫斯科有多少个蜂窝信号塔:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT count() FROM cell_towers
|
||||||
|
WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow))
|
||||||
|
```
|
||||||
|
```response
|
||||||
|
┌─count()─┐
|
||||||
|
│ 310463 │
|
||||||
|
└─────────┘
|
||||||
|
|
||||||
|
1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.)
|
||||||
|
```
|
||||||
|
|
||||||
|
虽然不能创建临时表,但此数据集仍可在 [Playground](https://play.clickhouse.com/play?user=play) 中进行交互式的请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=).
|
||||||
|
|
||||||
<Content />
|
|
||||||
|
@ -1,9 +1,352 @@
|
|||||||
---
|
---
|
||||||
slug: /zh/getting-started/example-datasets/menus
|
slug: /zh/getting-started/example-datasets/menus
|
||||||
sidebar_label: New York Public Library "What's on the Menu?" Dataset
|
sidebar_label: '纽约公共图书馆“菜单上有什么?”数据集'
|
||||||
title: "New York Public Library \"What's on the Menu?\" Dataset"
|
title: '纽约公共图书馆“菜单上有什么?”数据集'
|
||||||
---
|
---
|
||||||
|
|
||||||
import Content from '@site/docs/en/getting-started/example-datasets/menus.md';
|
该数据集由纽约公共图书馆创建。其中含有有关酒店、餐馆和咖啡馆的菜单上的菜肴及其价格的历史数据。
|
||||||
|
|
||||||
<Content />
|
来源:http://menus.nypl.org/data
|
||||||
|
数据为开放数据。
|
||||||
|
|
||||||
|
数据来自于图书馆中的档案,因此可能不完整,以至于难以进行统计分析。尽管如此,该数据集也是非常有意思的。数据集中只有 130 万条关于菜单中的菜肴的记录 - 这对于 ClickHouse 来说是一个非常小的数据量,但这仍是一个很好的例子。
|
||||||
|
|
||||||
|
## 下载数据集 {#download-dataset}
|
||||||
|
|
||||||
|
运行命令:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget https://s3.amazonaws.com/menusdata.nypl.org/gzips/2021_08_01_07_01_17_data.tgz
|
||||||
|
```
|
||||||
|
|
||||||
|
如果有需要可以使用 http://menus.nypl.org/data 中的最新链接。下载的大小约为 35 MB。
|
||||||
|
|
||||||
|
## 解压数据集 {#unpack-dataset}
|
||||||
|
|
||||||
|
```bash
|
||||||
|
tar xvf 2021_08_01_07_01_17_data.tgz
|
||||||
|
```
|
||||||
|
|
||||||
|
解压后的的大小约为 150 MB。
|
||||||
|
|
||||||
|
数据集由四个表组成:
|
||||||
|
|
||||||
|
- `Menu` - 有关菜单的信息,其中包含:餐厅名称,看到菜单的日期等
|
||||||
|
- `Dish` - 有关菜肴的信息,其中包含:菜肴名称以及一些特征。
|
||||||
|
- `MenuPage` - 有关菜单中页面的信息,每个页面都属于某个 `Menu`。
|
||||||
|
- `MenuItem` - 菜单项。某个菜单页面上的菜肴及其价格:指向 `Dish` 和 `MenuPage`的链接。
|
||||||
|
|
||||||
|
## 创建表 {#create-tables}
|
||||||
|
|
||||||
|
使用 [Decimal](/docs/zh/sql-reference/data-types/decimal.md) 数据类型来存储价格。
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE dish
|
||||||
|
(
|
||||||
|
id UInt32,
|
||||||
|
name String,
|
||||||
|
description String,
|
||||||
|
menus_appeared UInt32,
|
||||||
|
times_appeared Int32,
|
||||||
|
first_appeared UInt16,
|
||||||
|
last_appeared UInt16,
|
||||||
|
lowest_price Decimal64(3),
|
||||||
|
highest_price Decimal64(3)
|
||||||
|
) ENGINE = MergeTree ORDER BY id;
|
||||||
|
|
||||||
|
CREATE TABLE menu
|
||||||
|
(
|
||||||
|
id UInt32,
|
||||||
|
name String,
|
||||||
|
sponsor String,
|
||||||
|
event String,
|
||||||
|
venue String,
|
||||||
|
place String,
|
||||||
|
physical_description String,
|
||||||
|
occasion String,
|
||||||
|
notes String,
|
||||||
|
call_number String,
|
||||||
|
keywords String,
|
||||||
|
language String,
|
||||||
|
date String,
|
||||||
|
location String,
|
||||||
|
location_type String,
|
||||||
|
currency String,
|
||||||
|
currency_symbol String,
|
||||||
|
status String,
|
||||||
|
page_count UInt16,
|
||||||
|
dish_count UInt16
|
||||||
|
) ENGINE = MergeTree ORDER BY id;
|
||||||
|
|
||||||
|
CREATE TABLE menu_page
|
||||||
|
(
|
||||||
|
id UInt32,
|
||||||
|
menu_id UInt32,
|
||||||
|
page_number UInt16,
|
||||||
|
image_id String,
|
||||||
|
full_height UInt16,
|
||||||
|
full_width UInt16,
|
||||||
|
uuid UUID
|
||||||
|
) ENGINE = MergeTree ORDER BY id;
|
||||||
|
|
||||||
|
CREATE TABLE menu_item
|
||||||
|
(
|
||||||
|
id UInt32,
|
||||||
|
menu_page_id UInt32,
|
||||||
|
price Decimal64(3),
|
||||||
|
high_price Decimal64(3),
|
||||||
|
dish_id UInt32,
|
||||||
|
created_at DateTime,
|
||||||
|
updated_at DateTime,
|
||||||
|
xpos Float64,
|
||||||
|
ypos Float64
|
||||||
|
) ENGINE = MergeTree ORDER BY id;
|
||||||
|
```
|
||||||
|
|
||||||
|
## 导入数据 {#import-data}
|
||||||
|
|
||||||
|
执行以下命令将数据导入 ClickHouse:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO dish FORMAT CSVWithNames" < Dish.csv
|
||||||
|
clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO menu FORMAT CSVWithNames" < Menu.csv
|
||||||
|
clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO menu_page FORMAT CSVWithNames" < MenuPage.csv
|
||||||
|
clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --date_time_input_format best_effort --query "INSERT INTO menu_item FORMAT CSVWithNames" < MenuItem.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
因为数据由带有标题的 CSV 表示,所以使用 [CSVWithNames](/docs/zh/interfaces/formats.md#csvwithnames) 格式。
|
||||||
|
|
||||||
|
因为只有双引号用于数据字段,单引号可以在值内,所以禁用了 `format_csv_allow_single_quotes` 以避免混淆 CSV 解析器。
|
||||||
|
|
||||||
|
因为数据中没有 [NULL](/docs/zh/sql-reference/syntax.md#null-literal) 值,所以禁用 [input_format_null_as_default](/docs/zh/operations/settings/settings.md#settings-input-format-null-as-default)。不然 ClickHouse 将会尝试解析 `\N` 序列,并可能与数据中的 `\` 混淆。
|
||||||
|
|
||||||
|
设置 [date_time_input_format best_effort](/docs/zh/operations/settings/settings.md#settings-date_time_input_format) 以便解析各种格式的 [DateTime](/docs/zh/sql-reference/data-types/datetime.md)字段。例如,识别像“2000-01-01 01:02”这样没有秒数的 ISO-8601 时间字符串。如果没有此设置,则仅允许使用固定的 DateTime 格式。
|
||||||
|
|
||||||
|
## 非规范化数据 {#denormalize-data}
|
||||||
|
|
||||||
|
数据以 [规范化形式] (https://en.wikipedia.org/wiki/Database_normalization#Normal_forms) 在多个表格中呈现。这意味着如果你想进行如查询菜单项中的菜名这类的查询,则必须执行 [JOIN](/docs/zh/sql-reference/statements/select/join.md#select-join)。在典型的分析任务中,预先处理联接的数据以避免每次都执行“联接”会更有效率。这中操作被称为“非规范化”数据。
|
||||||
|
|
||||||
|
我们将创建一个表“menu_item_denorm”,其中将包含所有联接在一起的数据:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE menu_item_denorm
|
||||||
|
ENGINE = MergeTree ORDER BY (dish_name, created_at)
|
||||||
|
AS SELECT
|
||||||
|
price,
|
||||||
|
high_price,
|
||||||
|
created_at,
|
||||||
|
updated_at,
|
||||||
|
xpos,
|
||||||
|
ypos,
|
||||||
|
dish.id AS dish_id,
|
||||||
|
dish.name AS dish_name,
|
||||||
|
dish.description AS dish_description,
|
||||||
|
dish.menus_appeared AS dish_menus_appeared,
|
||||||
|
dish.times_appeared AS dish_times_appeared,
|
||||||
|
dish.first_appeared AS dish_first_appeared,
|
||||||
|
dish.last_appeared AS dish_last_appeared,
|
||||||
|
dish.lowest_price AS dish_lowest_price,
|
||||||
|
dish.highest_price AS dish_highest_price,
|
||||||
|
menu.id AS menu_id,
|
||||||
|
menu.name AS menu_name,
|
||||||
|
menu.sponsor AS menu_sponsor,
|
||||||
|
menu.event AS menu_event,
|
||||||
|
menu.venue AS menu_venue,
|
||||||
|
menu.place AS menu_place,
|
||||||
|
menu.physical_description AS menu_physical_description,
|
||||||
|
menu.occasion AS menu_occasion,
|
||||||
|
menu.notes AS menu_notes,
|
||||||
|
menu.call_number AS menu_call_number,
|
||||||
|
menu.keywords AS menu_keywords,
|
||||||
|
menu.language AS menu_language,
|
||||||
|
menu.date AS menu_date,
|
||||||
|
menu.location AS menu_location,
|
||||||
|
menu.location_type AS menu_location_type,
|
||||||
|
menu.currency AS menu_currency,
|
||||||
|
menu.currency_symbol AS menu_currency_symbol,
|
||||||
|
menu.status AS menu_status,
|
||||||
|
menu.page_count AS menu_page_count,
|
||||||
|
menu.dish_count AS menu_dish_count
|
||||||
|
FROM menu_item
|
||||||
|
JOIN dish ON menu_item.dish_id = dish.id
|
||||||
|
JOIN menu_page ON menu_item.menu_page_id = menu_page.id
|
||||||
|
JOIN menu ON menu_page.menu_id = menu.id;
|
||||||
|
```
|
||||||
|
|
||||||
|
## 验证数据 {#validate-data}
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT count() FROM menu_item_denorm;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌─count()─┐
|
||||||
|
│ 1329175 │
|
||||||
|
└─────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## 运行一些查询 {#run-queries}
|
||||||
|
|
||||||
|
### 菜品的平均历史价格 {#query-averaged-historical-prices}
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d,
|
||||||
|
count(),
|
||||||
|
round(avg(price), 2),
|
||||||
|
bar(avg(price), 0, 100, 100)
|
||||||
|
FROM menu_item_denorm
|
||||||
|
WHERE (menu_currency = 'Dollars') AND (d > 0) AND (d < 2022)
|
||||||
|
GROUP BY d
|
||||||
|
ORDER BY d ASC;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 100, 100)─┐
|
||||||
|
│ 1850 │ 618 │ 1.5 │ █▍ │
|
||||||
|
│ 1860 │ 1634 │ 1.29 │ █▎ │
|
||||||
|
│ 1870 │ 2215 │ 1.36 │ █▎ │
|
||||||
|
│ 1880 │ 3909 │ 1.01 │ █ │
|
||||||
|
│ 1890 │ 8837 │ 1.4 │ █▍ │
|
||||||
|
│ 1900 │ 176292 │ 0.68 │ ▋ │
|
||||||
|
│ 1910 │ 212196 │ 0.88 │ ▊ │
|
||||||
|
│ 1920 │ 179590 │ 0.74 │ ▋ │
|
||||||
|
│ 1930 │ 73707 │ 0.6 │ ▌ │
|
||||||
|
│ 1940 │ 58795 │ 0.57 │ ▌ │
|
||||||
|
│ 1950 │ 41407 │ 0.95 │ ▊ │
|
||||||
|
│ 1960 │ 51179 │ 1.32 │ █▎ │
|
||||||
|
│ 1970 │ 12914 │ 1.86 │ █▋ │
|
||||||
|
│ 1980 │ 7268 │ 4.35 │ ████▎ │
|
||||||
|
│ 1990 │ 11055 │ 6.03 │ ██████ │
|
||||||
|
│ 2000 │ 2467 │ 11.85 │ ███████████▋ │
|
||||||
|
│ 2010 │ 597 │ 25.66 │ █████████████████████████▋ │
|
||||||
|
└──────┴─────────┴──────────────────────┴──────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
带上一粒盐。
|
||||||
|
|
||||||
|
### 汉堡价格 {#query-burger-prices}
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d,
|
||||||
|
count(),
|
||||||
|
round(avg(price), 2),
|
||||||
|
bar(avg(price), 0, 50, 100)
|
||||||
|
FROM menu_item_denorm
|
||||||
|
WHERE (menu_currency = 'Dollars') AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%burger%')
|
||||||
|
GROUP BY d
|
||||||
|
ORDER BY d ASC;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)───────────┐
|
||||||
|
│ 1880 │ 2 │ 0.42 │ ▋ │
|
||||||
|
│ 1890 │ 7 │ 0.85 │ █▋ │
|
||||||
|
│ 1900 │ 399 │ 0.49 │ ▊ │
|
||||||
|
│ 1910 │ 589 │ 0.68 │ █▎ │
|
||||||
|
│ 1920 │ 280 │ 0.56 │ █ │
|
||||||
|
│ 1930 │ 74 │ 0.42 │ ▋ │
|
||||||
|
│ 1940 │ 119 │ 0.59 │ █▏ │
|
||||||
|
│ 1950 │ 134 │ 1.09 │ ██▏ │
|
||||||
|
│ 1960 │ 272 │ 0.92 │ █▋ │
|
||||||
|
│ 1970 │ 108 │ 1.18 │ ██▎ │
|
||||||
|
│ 1980 │ 88 │ 2.82 │ █████▋ │
|
||||||
|
│ 1990 │ 184 │ 3.68 │ ███████▎ │
|
||||||
|
│ 2000 │ 21 │ 7.14 │ ██████████████▎ │
|
||||||
|
│ 2010 │ 6 │ 18.42 │ ████████████████████████████████████▋ │
|
||||||
|
└──────┴─────────┴──────────────────────┴───────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
###伏特加{#query-vodka}
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d,
|
||||||
|
count(),
|
||||||
|
round(avg(price), 2),
|
||||||
|
bar(avg(price), 0, 50, 100)
|
||||||
|
FROM menu_item_denorm
|
||||||
|
WHERE (menu_currency IN ('Dollars', '')) AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%vodka%')
|
||||||
|
GROUP BY d
|
||||||
|
ORDER BY d ASC;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)─┐
|
||||||
|
│ 1910 │ 2 │ 0 │ │
|
||||||
|
│ 1920 │ 1 │ 0.3 │ ▌ │
|
||||||
|
│ 1940 │ 21 │ 0.42 │ ▋ │
|
||||||
|
│ 1950 │ 14 │ 0.59 │ █▏ │
|
||||||
|
│ 1960 │ 113 │ 2.17 │ ████▎ │
|
||||||
|
│ 1970 │ 37 │ 0.68 │ █▎ │
|
||||||
|
│ 1980 │ 19 │ 2.55 │ █████ │
|
||||||
|
│ 1990 │ 86 │ 3.6 │ ███████▏ │
|
||||||
|
│ 2000 │ 2 │ 3.98 │ ███████▊ │
|
||||||
|
└──────┴─────────┴──────────────────────┴─────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
要查询 `Vodka`,必须声明通过 `ILIKE '%vodka%'` 进行查询。
|
||||||
|
|
||||||
|
### 鱼子酱 {#query-caviar}
|
||||||
|
|
||||||
|
列出鱼子酱的价格。另外,列出任何带有鱼子酱的菜肴的名称。
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d,
|
||||||
|
count(),
|
||||||
|
round(avg(price), 2),
|
||||||
|
bar(avg(price), 0, 50, 100),
|
||||||
|
any(dish_name)
|
||||||
|
FROM menu_item_denorm
|
||||||
|
WHERE (menu_currency IN ('Dollars', '')) AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%caviar%')
|
||||||
|
GROUP BY d
|
||||||
|
ORDER BY d ASC;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)──────┬─any(dish_name)──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ 1090 │ 1 │ 0 │ │ Caviar │
|
||||||
|
│ 1880 │ 3 │ 0 │ │ Caviar │
|
||||||
|
│ 1890 │ 39 │ 0.59 │ █▏ │ Butter and caviar │
|
||||||
|
│ 1900 │ 1014 │ 0.34 │ ▋ │ Anchovy Caviar on Toast │
|
||||||
|
│ 1910 │ 1588 │ 1.35 │ ██▋ │ 1/1 Brötchen Caviar │
|
||||||
|
│ 1920 │ 927 │ 1.37 │ ██▋ │ ASTRAKAN CAVIAR │
|
||||||
|
│ 1930 │ 289 │ 1.91 │ ███▋ │ Astrachan caviar │
|
||||||
|
│ 1940 │ 201 │ 0.83 │ █▋ │ (SPECIAL) Domestic Caviar Sandwich │
|
||||||
|
│ 1950 │ 81 │ 2.27 │ ████▌ │ Beluga Caviar │
|
||||||
|
│ 1960 │ 126 │ 2.21 │ ████▍ │ Beluga Caviar │
|
||||||
|
│ 1970 │ 105 │ 0.95 │ █▊ │ BELUGA MALOSSOL CAVIAR AMERICAN DRESSING │
|
||||||
|
│ 1980 │ 12 │ 7.22 │ ██████████████▍ │ Authentic Iranian Beluga Caviar the world's finest black caviar presented in ice garni and a sampling of chilled 100° Russian vodka │
|
||||||
|
│ 1990 │ 74 │ 14.42 │ ████████████████████████████▋ │ Avocado Salad, Fresh cut avocado with caviare │
|
||||||
|
│ 2000 │ 3 │ 7.82 │ ███████████████▋ │ Aufgeschlagenes Kartoffelsueppchen mit Forellencaviar │
|
||||||
|
│ 2010 │ 6 │ 15.58 │ ███████████████████████████████▏ │ "OYSTERS AND PEARLS" "Sabayon" of Pearl Tapioca with Island Creek Oysters and Russian Sevruga Caviar │
|
||||||
|
└──────┴─────────┴──────────────────────┴──────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
至少他们有伏特加配鱼子酱。真棒。
|
||||||
|
|
||||||
|
## 在线 Playground{#playground}
|
||||||
|
|
||||||
|
此数据集已经上传到了 ClickHouse Playground 中,[example](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==)。
|
||||||
|
@ -1,9 +1,416 @@
|
|||||||
---
|
---
|
||||||
slug: /zh/getting-started/example-datasets/opensky
|
slug: /zh/getting-started/example-datasets/opensky
|
||||||
sidebar_label: Air Traffic Data
|
sidebar_label: 空中交通数据
|
||||||
title: "Crowdsourced air traffic data from The OpenSky Network 2020"
|
description: 该数据集中的数据是从完整的 OpenSky 数据集中衍生而来的,对其中的数据进行了必要的清理,用以展示在 COVID-19 期间空中交通的发展。
|
||||||
|
title: "来自 The OpenSky Network 2020 的众包空中交通数据"
|
||||||
---
|
---
|
||||||
|
|
||||||
import Content from '@site/docs/en/getting-started/example-datasets/opensky.md';
|
该数据集中的数据是从完整的 OpenSky 数据集中派生和清理的,以说明 COVID-19 大流行期间空中交通的发展。它涵盖了自 2019 年 1 月 1 日以来该网络中 2500 多名成员观测到的所有航班。直到 COVID-19 大流行结束,更多数据将定期的更新到数据集中。
|
||||||
|
|
||||||
<Content />
|
来源:https://zenodo.org/record/5092942#.YRBCyTpRXYd
|
||||||
|
|
||||||
|
Martin Strohmeier、Xavier Olive、Jannis Lübbe、Matthias Schäfer 和 Vincent Lenders “来自 OpenSky 网络 2019-2020 的众包空中交通数据”地球系统科学数据 13(2),2021 https://doi.org/10.5194/essd- 13-357-2021
|
||||||
|
|
||||||
|
## 下载数据集 {#download-dataset}
|
||||||
|
|
||||||
|
运行命令:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
|
||||||
|
```
|
||||||
|
|
||||||
|
Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
|
||||||
|
|
||||||
|
## 创建表 {#create-table}
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE opensky
|
||||||
|
(
|
||||||
|
callsign String,
|
||||||
|
number String,
|
||||||
|
icao24 String,
|
||||||
|
registration String,
|
||||||
|
typecode String,
|
||||||
|
origin String,
|
||||||
|
destination String,
|
||||||
|
firstseen DateTime,
|
||||||
|
lastseen DateTime,
|
||||||
|
day DateTime,
|
||||||
|
latitude_1 Float64,
|
||||||
|
longitude_1 Float64,
|
||||||
|
altitude_1 Float64,
|
||||||
|
latitude_2 Float64,
|
||||||
|
longitude_2 Float64,
|
||||||
|
altitude_2 Float64
|
||||||
|
) ENGINE = MergeTree ORDER BY (origin, destination, callsign);
|
||||||
|
```
|
||||||
|
|
||||||
|
## 导入数据 {#import-data}
|
||||||
|
|
||||||
|
将数据并行导入到 ClickHouse:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"'
|
||||||
|
```
|
||||||
|
|
||||||
|
- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处理。 `xargs -P100` 指定最多使用 100 个并行工作程序,但由于我们只有 30 个文件,工作程序的数量将只有 30 个。
|
||||||
|
- 对于每个文件,`xargs` 将通过 `bash -c` 为每个文件运行一个脚本文件。该脚本通过使用 `{}` 表示文件名占位符,然后 `xargs` 由命令进行填充(使用 `-I{}`)。
|
||||||
|
- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` 参数),并将输出重定向到 `clickhouse-client`。
|
||||||
|
- 我们还要求使用扩展解析器解析 [DateTime](../../sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](../../operations/settings/ settings.md#settings-date_time_input_format)) 以识别具有时区偏移的 ISO-8601 格式。
|
||||||
|
|
||||||
|
最后,`clickhouse-client` 会以 [CSVWithNames](../../interfaces/formats.md#csvwithnames) 格式读取输入数据然后执行插入。
|
||||||
|
|
||||||
|
并行导入需要 24 秒。
|
||||||
|
|
||||||
|
如果您不想使用并行导入,以下是顺序导入的方式:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done
|
||||||
|
```
|
||||||
|
|
||||||
|
## 验证数据 {#validate-data}
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT count() FROM opensky;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌──count()─┐
|
||||||
|
│ 66010819 │
|
||||||
|
└──────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
ClickHouse 中的数据集大小只有 2.66 GiB,检查一下。
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky';
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌─formatReadableSize(total_bytes)─┐
|
||||||
|
│ 2.66 GiB │
|
||||||
|
└─────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## 运行一些查询 {#run-queries}
|
||||||
|
|
||||||
|
总行驶距离为 680 亿公里。
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐
|
||||||
|
│ 68.72 billion │
|
||||||
|
└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
平均飞行距离约为 1000 公里。
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
|
||||||
|
│ 1041090.6465708319 │
|
||||||
|
└────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 最繁忙的始发机场和观测到的平均距离{#busy-airports-average-distance}
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
origin,
|
||||||
|
count(),
|
||||||
|
round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))) AS distance,
|
||||||
|
bar(distance, 0, 10000000, 100) AS bar
|
||||||
|
FROM opensky
|
||||||
|
WHERE origin != ''
|
||||||
|
GROUP BY origin
|
||||||
|
ORDER BY count() DESC
|
||||||
|
LIMIT 100;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐
|
||||||
|
1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │
|
||||||
|
2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │
|
||||||
|
3. │ KATL │ 667286 │ 1169661 │ ███████████▋ │
|
||||||
|
4. │ KDEN │ 582709 │ 1287742 │ ████████████▊ │
|
||||||
|
5. │ KLAX │ 581952 │ 2628393 │ ██████████████████████████▎ │
|
||||||
|
6. │ KLAS │ 447789 │ 1336967 │ █████████████▎ │
|
||||||
|
7. │ KPHX │ 428558 │ 1345635 │ █████████████▍ │
|
||||||
|
8. │ KSEA │ 412592 │ 1757317 │ █████████████████▌ │
|
||||||
|
9. │ KCLT │ 404612 │ 880355 │ ████████▋ │
|
||||||
|
10. │ VIDP │ 363074 │ 1445052 │ ██████████████▍ │
|
||||||
|
11. │ EDDF │ 362643 │ 2263960 │ ██████████████████████▋ │
|
||||||
|
12. │ KSFO │ 361869 │ 2445732 │ ████████████████████████▍ │
|
||||||
|
13. │ KJFK │ 349232 │ 2996550 │ █████████████████████████████▊ │
|
||||||
|
14. │ KMSP │ 346010 │ 1287328 │ ████████████▋ │
|
||||||
|
15. │ LFPG │ 344748 │ 2206203 │ ██████████████████████ │
|
||||||
|
16. │ EGLL │ 341370 │ 3216593 │ ████████████████████████████████▏ │
|
||||||
|
17. │ EHAM │ 340272 │ 2116425 │ █████████████████████▏ │
|
||||||
|
18. │ KEWR │ 337696 │ 1826545 │ ██████████████████▎ │
|
||||||
|
19. │ KPHL │ 320762 │ 1291761 │ ████████████▊ │
|
||||||
|
20. │ OMDB │ 308855 │ 2855706 │ ████████████████████████████▌ │
|
||||||
|
21. │ UUEE │ 307098 │ 1555122 │ ███████████████▌ │
|
||||||
|
22. │ KBOS │ 304416 │ 1621675 │ ████████████████▏ │
|
||||||
|
23. │ LEMD │ 291787 │ 1695097 │ ████████████████▊ │
|
||||||
|
24. │ YSSY │ 272979 │ 1875298 │ ██████████████████▋ │
|
||||||
|
25. │ KMIA │ 265121 │ 1923542 │ ███████████████████▏ │
|
||||||
|
26. │ ZGSZ │ 263497 │ 745086 │ ███████▍ │
|
||||||
|
27. │ EDDM │ 256691 │ 1361453 │ █████████████▌ │
|
||||||
|
28. │ WMKK │ 254264 │ 1626688 │ ████████████████▎ │
|
||||||
|
29. │ CYYZ │ 251192 │ 2175026 │ █████████████████████▋ │
|
||||||
|
30. │ KLGA │ 248699 │ 1106935 │ ███████████ │
|
||||||
|
31. │ VHHH │ 248473 │ 3457658 │ ██████████████████████████████████▌ │
|
||||||
|
32. │ RJTT │ 243477 │ 1272744 │ ████████████▋ │
|
||||||
|
33. │ KBWI │ 241440 │ 1187060 │ ███████████▋ │
|
||||||
|
34. │ KIAD │ 239558 │ 1683485 │ ████████████████▋ │
|
||||||
|
35. │ KIAH │ 234202 │ 1538335 │ ███████████████▍ │
|
||||||
|
36. │ KFLL │ 223447 │ 1464410 │ ██████████████▋ │
|
||||||
|
37. │ KDAL │ 212055 │ 1082339 │ ██████████▋ │
|
||||||
|
38. │ KDCA │ 207883 │ 1013359 │ ██████████▏ │
|
||||||
|
39. │ LIRF │ 207047 │ 1427965 │ ██████████████▎ │
|
||||||
|
40. │ PANC │ 206007 │ 2525359 │ █████████████████████████▎ │
|
||||||
|
41. │ LTFJ │ 205415 │ 860470 │ ████████▌ │
|
||||||
|
42. │ KDTW │ 204020 │ 1106716 │ ███████████ │
|
||||||
|
43. │ VABB │ 201679 │ 1300865 │ █████████████ │
|
||||||
|
44. │ OTHH │ 200797 │ 3759544 │ █████████████████████████████████████▌ │
|
||||||
|
45. │ KMDW │ 200796 │ 1232551 │ ████████████▎ │
|
||||||
|
46. │ KSAN │ 198003 │ 1495195 │ ██████████████▊ │
|
||||||
|
47. │ KPDX │ 197760 │ 1269230 │ ████████████▋ │
|
||||||
|
48. │ SBGR │ 197624 │ 2041697 │ ████████████████████▍ │
|
||||||
|
49. │ VOBL │ 189011 │ 1040180 │ ██████████▍ │
|
||||||
|
50. │ LEBL │ 188956 │ 1283190 │ ████████████▋ │
|
||||||
|
51. │ YBBN │ 188011 │ 1253405 │ ████████████▌ │
|
||||||
|
52. │ LSZH │ 187934 │ 1572029 │ ███████████████▋ │
|
||||||
|
53. │ YMML │ 187643 │ 1870076 │ ██████████████████▋ │
|
||||||
|
54. │ RCTP │ 184466 │ 2773976 │ ███████████████████████████▋ │
|
||||||
|
55. │ KSNA │ 180045 │ 778484 │ ███████▋ │
|
||||||
|
56. │ EGKK │ 176420 │ 1694770 │ ████████████████▊ │
|
||||||
|
57. │ LOWW │ 176191 │ 1274833 │ ████████████▋ │
|
||||||
|
58. │ UUDD │ 176099 │ 1368226 │ █████████████▋ │
|
||||||
|
59. │ RKSI │ 173466 │ 3079026 │ ██████████████████████████████▋ │
|
||||||
|
60. │ EKCH │ 172128 │ 1229895 │ ████████████▎ │
|
||||||
|
61. │ KOAK │ 171119 │ 1114447 │ ███████████▏ │
|
||||||
|
62. │ RPLL │ 170122 │ 1440735 │ ██████████████▍ │
|
||||||
|
63. │ KRDU │ 167001 │ 830521 │ ████████▎ │
|
||||||
|
64. │ KAUS │ 164524 │ 1256198 │ ████████████▌ │
|
||||||
|
65. │ KBNA │ 163242 │ 1022726 │ ██████████▏ │
|
||||||
|
66. │ KSDF │ 162655 │ 1380867 │ █████████████▋ │
|
||||||
|
67. │ ENGM │ 160732 │ 910108 │ █████████ │
|
||||||
|
68. │ LIMC │ 160696 │ 1564620 │ ███████████████▋ │
|
||||||
|
69. │ KSJC │ 159278 │ 1081125 │ ██████████▋ │
|
||||||
|
70. │ KSTL │ 157984 │ 1026699 │ ██████████▎ │
|
||||||
|
71. │ UUWW │ 156811 │ 1261155 │ ████████████▌ │
|
||||||
|
72. │ KIND │ 153929 │ 987944 │ █████████▊ │
|
||||||
|
73. │ ESSA │ 153390 │ 1203439 │ ████████████ │
|
||||||
|
74. │ KMCO │ 153351 │ 1508657 │ ███████████████ │
|
||||||
|
75. │ KDVT │ 152895 │ 74048 │ ▋ │
|
||||||
|
76. │ VTBS │ 152645 │ 2255591 │ ██████████████████████▌ │
|
||||||
|
77. │ CYVR │ 149574 │ 2027413 │ ████████████████████▎ │
|
||||||
|
78. │ EIDW │ 148723 │ 1503985 │ ███████████████ │
|
||||||
|
79. │ LFPO │ 143277 │ 1152964 │ ███████████▌ │
|
||||||
|
80. │ EGSS │ 140830 │ 1348183 │ █████████████▍ │
|
||||||
|
81. │ KAPA │ 140776 │ 420441 │ ████▏ │
|
||||||
|
82. │ KHOU │ 138985 │ 1068806 │ ██████████▋ │
|
||||||
|
83. │ KTPA │ 138033 │ 1338223 │ █████████████▍ │
|
||||||
|
84. │ KFFZ │ 137333 │ 55397 │ ▌ │
|
||||||
|
85. │ NZAA │ 136092 │ 1581264 │ ███████████████▋ │
|
||||||
|
86. │ YPPH │ 133916 │ 1271550 │ ████████████▋ │
|
||||||
|
87. │ RJBB │ 133522 │ 1805623 │ ██████████████████ │
|
||||||
|
88. │ EDDL │ 133018 │ 1265919 │ ████████████▋ │
|
||||||
|
89. │ ULLI │ 130501 │ 1197108 │ ███████████▊ │
|
||||||
|
90. │ KIWA │ 127195 │ 250876 │ ██▌ │
|
||||||
|
91. │ KTEB │ 126969 │ 1189414 │ ███████████▊ │
|
||||||
|
92. │ VOMM │ 125616 │ 1127757 │ ███████████▎ │
|
||||||
|
93. │ LSGG │ 123998 │ 1049101 │ ██████████▍ │
|
||||||
|
94. │ LPPT │ 122733 │ 1779187 │ █████████████████▋ │
|
||||||
|
95. │ WSSS │ 120493 │ 3264122 │ ████████████████████████████████▋ │
|
||||||
|
96. │ EBBR │ 118539 │ 1579939 │ ███████████████▋ │
|
||||||
|
97. │ VTBD │ 118107 │ 661627 │ ██████▌ │
|
||||||
|
98. │ KVNY │ 116326 │ 692960 │ ██████▊ │
|
||||||
|
99. │ EDDT │ 115122 │ 941740 │ █████████▍ │
|
||||||
|
100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │
|
||||||
|
└────────┴─────────┴──────────┴────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 每周来自莫斯科三个主要机场的航班数量 {#flights-from-moscow}
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
toMonday(day) AS k,
|
||||||
|
count() AS c,
|
||||||
|
bar(c, 0, 10000, 100) AS bar
|
||||||
|
FROM opensky
|
||||||
|
WHERE origin IN ('UUEE', 'UUDD', 'UUWW')
|
||||||
|
GROUP BY k
|
||||||
|
ORDER BY k ASC;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
```text
|
||||||
|
┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐
|
||||||
|
1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │
|
||||||
|
2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │
|
||||||
|
3. │ 2019-01-14 │ 5701 │ █████████████████████████████████████████████████████████ │
|
||||||
|
4. │ 2019-01-21 │ 5638 │ ████████████████████████████████████████████████████████▍ │
|
||||||
|
5. │ 2019-01-28 │ 5731 │ █████████████████████████████████████████████████████████▎ │
|
||||||
|
6. │ 2019-02-04 │ 5683 │ ████████████████████████████████████████████████████████▋ │
|
||||||
|
7. │ 2019-02-11 │ 5759 │ █████████████████████████████████████████████████████████▌ │
|
||||||
|
8. │ 2019-02-18 │ 5736 │ █████████████████████████████████████████████████████████▎ │
|
||||||
|
9. │ 2019-02-25 │ 5873 │ ██████████████████████████████████████████████████████████▋ │
|
||||||
|
10. │ 2019-03-04 │ 5965 │ ███████████████████████████████████████████████████████████▋ │
|
||||||
|
11. │ 2019-03-11 │ 5900 │ ███████████████████████████████████████████████████████████ │
|
||||||
|
12. │ 2019-03-18 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
|
||||||
|
13. │ 2019-03-25 │ 5899 │ ██████████████████████████████████████████████████████████▊ │
|
||||||
|
14. │ 2019-04-01 │ 6043 │ ████████████████████████████████████████████████████████████▍ │
|
||||||
|
15. │ 2019-04-08 │ 6098 │ ████████████████████████████████████████████████████████████▊ │
|
||||||
|
16. │ 2019-04-15 │ 6196 │ █████████████████████████████████████████████████████████████▊ │
|
||||||
|
17. │ 2019-04-22 │ 6486 │ ████████████████████████████████████████████████████████████████▋ │
|
||||||
|
18. │ 2019-04-29 │ 6682 │ ██████████████████████████████████████████████████████████████████▋ │
|
||||||
|
19. │ 2019-05-06 │ 6739 │ ███████████████████████████████████████████████████████████████████▍ │
|
||||||
|
20. │ 2019-05-13 │ 6600 │ ██████████████████████████████████████████████████████████████████ │
|
||||||
|
21. │ 2019-05-20 │ 6575 │ █████████████████████████████████████████████████████████████████▋ │
|
||||||
|
22. │ 2019-05-27 │ 6786 │ ███████████████████████████████████████████████████████████████████▋ │
|
||||||
|
23. │ 2019-06-03 │ 6872 │ ████████████████████████████████████████████████████████████████████▋ │
|
||||||
|
24. │ 2019-06-10 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||||
|
25. │ 2019-06-17 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||||
|
26. │ 2019-06-24 │ 6852 │ ████████████████████████████████████████████████████████████████████▌ │
|
||||||
|
27. │ 2019-07-01 │ 7248 │ ████████████████████████████████████████████████████████████████████████▍ │
|
||||||
|
28. │ 2019-07-08 │ 7284 │ ████████████████████████████████████████████████████████████████████████▋ │
|
||||||
|
29. │ 2019-07-15 │ 7142 │ ███████████████████████████████████████████████████████████████████████▍ │
|
||||||
|
30. │ 2019-07-22 │ 7108 │ ███████████████████████████████████████████████████████████████████████ │
|
||||||
|
31. │ 2019-07-29 │ 7251 │ ████████████████████████████████████████████████████████████████████████▌ │
|
||||||
|
32. │ 2019-08-05 │ 7403 │ ██████████████████████████████████████████████████████████████████████████ │
|
||||||
|
33. │ 2019-08-12 │ 7457 │ ██████████████████████████████████████████████████████████████████████████▌ │
|
||||||
|
34. │ 2019-08-19 │ 7502 │ ███████████████████████████████████████████████████████████████████████████ │
|
||||||
|
35. │ 2019-08-26 │ 7540 │ ███████████████████████████████████████████████████████████████████████████▍ │
|
||||||
|
36. │ 2019-09-02 │ 7237 │ ████████████████████████████████████████████████████████████████████████▎ │
|
||||||
|
37. │ 2019-09-09 │ 7328 │ █████████████████████████████████████████████████████████████████████████▎ │
|
||||||
|
38. │ 2019-09-16 │ 5566 │ ███████████████████████████████████████████████████████▋ │
|
||||||
|
39. │ 2019-09-23 │ 7049 │ ██████████████████████████████████████████████████████████████████████▍ │
|
||||||
|
40. │ 2019-09-30 │ 6880 │ ████████████████████████████████████████████████████████████████████▋ │
|
||||||
|
41. │ 2019-10-07 │ 6518 │ █████████████████████████████████████████████████████████████████▏ │
|
||||||
|
42. │ 2019-10-14 │ 6688 │ ██████████████████████████████████████████████████████████████████▊ │
|
||||||
|
43. │ 2019-10-21 │ 6667 │ ██████████████████████████████████████████████████████████████████▋ │
|
||||||
|
44. │ 2019-10-28 │ 6303 │ ███████████████████████████████████████████████████████████████ │
|
||||||
|
45. │ 2019-11-04 │ 6298 │ ██████████████████████████████████████████████████████████████▊ │
|
||||||
|
46. │ 2019-11-11 │ 6137 │ █████████████████████████████████████████████████████████████▎ │
|
||||||
|
47. │ 2019-11-18 │ 6051 │ ████████████████████████████████████████████████████████████▌ │
|
||||||
|
48. │ 2019-11-25 │ 5820 │ ██████████████████████████████████████████████████████████▏ │
|
||||||
|
49. │ 2019-12-02 │ 5942 │ ███████████████████████████████████████████████████████████▍ │
|
||||||
|
50. │ 2019-12-09 │ 4891 │ ████████████████████████████████████████████████▊ │
|
||||||
|
51. │ 2019-12-16 │ 5682 │ ████████████████████████████████████████████████████████▋ │
|
||||||
|
52. │ 2019-12-23 │ 6111 │ █████████████████████████████████████████████████████████████ │
|
||||||
|
53. │ 2019-12-30 │ 5870 │ ██████████████████████████████████████████████████████████▋ │
|
||||||
|
54. │ 2020-01-06 │ 5953 │ ███████████████████████████████████████████████████████████▌ │
|
||||||
|
55. │ 2020-01-13 │ 5698 │ ████████████████████████████████████████████████████████▊ │
|
||||||
|
56. │ 2020-01-20 │ 5339 │ █████████████████████████████████████████████████████▍ │
|
||||||
|
57. │ 2020-01-27 │ 5566 │ ███████████████████████████████████████████████████████▋ │
|
||||||
|
58. │ 2020-02-03 │ 5801 │ ██████████████████████████████████████████████████████████ │
|
||||||
|
59. │ 2020-02-10 │ 5692 │ ████████████████████████████████████████████████████████▊ │
|
||||||
|
60. │ 2020-02-17 │ 5912 │ ███████████████████████████████████████████████████████████ │
|
||||||
|
61. │ 2020-02-24 │ 6031 │ ████████████████████████████████████████████████████████████▎ │
|
||||||
|
62. │ 2020-03-02 │ 6105 │ █████████████████████████████████████████████████████████████ │
|
||||||
|
63. │ 2020-03-09 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
|
||||||
|
64. │ 2020-03-16 │ 4659 │ ██████████████████████████████████████████████▌ │
|
||||||
|
65. │ 2020-03-23 │ 3720 │ █████████████████████████████████████▏ │
|
||||||
|
66. │ 2020-03-30 │ 1720 │ █████████████████▏ │
|
||||||
|
67. │ 2020-04-06 │ 849 │ ████████▍ │
|
||||||
|
68. │ 2020-04-13 │ 710 │ ███████ │
|
||||||
|
69. │ 2020-04-20 │ 725 │ ███████▏ │
|
||||||
|
70. │ 2020-04-27 │ 920 │ █████████▏ │
|
||||||
|
71. │ 2020-05-04 │ 859 │ ████████▌ │
|
||||||
|
72. │ 2020-05-11 │ 1047 │ ██████████▍ │
|
||||||
|
73. │ 2020-05-18 │ 1135 │ ███████████▎ │
|
||||||
|
74. │ 2020-05-25 │ 1266 │ ████████████▋ │
|
||||||
|
75. │ 2020-06-01 │ 1793 │ █████████████████▊ │
|
||||||
|
76. │ 2020-06-08 │ 1979 │ ███████████████████▋ │
|
||||||
|
77. │ 2020-06-15 │ 2297 │ ██████████████████████▊ │
|
||||||
|
78. │ 2020-06-22 │ 2788 │ ███████████████████████████▊ │
|
||||||
|
79. │ 2020-06-29 │ 3389 │ █████████████████████████████████▊ │
|
||||||
|
80. │ 2020-07-06 │ 3545 │ ███████████████████████████████████▍ │
|
||||||
|
81. │ 2020-07-13 │ 3569 │ ███████████████████████████████████▋ │
|
||||||
|
82. │ 2020-07-20 │ 3784 │ █████████████████████████████████████▋ │
|
||||||
|
83. │ 2020-07-27 │ 3960 │ ███████████████████████████████████████▌ │
|
||||||
|
84. │ 2020-08-03 │ 4323 │ ███████████████████████████████████████████▏ │
|
||||||
|
85. │ 2020-08-10 │ 4581 │ █████████████████████████████████████████████▋ │
|
||||||
|
86. │ 2020-08-17 │ 4791 │ ███████████████████████████████████████████████▊ │
|
||||||
|
87. │ 2020-08-24 │ 4928 │ █████████████████████████████████████████████████▎ │
|
||||||
|
88. │ 2020-08-31 │ 4687 │ ██████████████████████████████████████████████▋ │
|
||||||
|
89. │ 2020-09-07 │ 4643 │ ██████████████████████████████████████████████▍ │
|
||||||
|
90. │ 2020-09-14 │ 4594 │ █████████████████████████████████████████████▊ │
|
||||||
|
91. │ 2020-09-21 │ 4478 │ ████████████████████████████████████████████▋ │
|
||||||
|
92. │ 2020-09-28 │ 4382 │ ███████████████████████████████████████████▋ │
|
||||||
|
93. │ 2020-10-05 │ 4261 │ ██████████████████████████████████████████▌ │
|
||||||
|
94. │ 2020-10-12 │ 4243 │ ██████████████████████████████████████████▍ │
|
||||||
|
95. │ 2020-10-19 │ 3941 │ ███████████████████████████████████████▍ │
|
||||||
|
96. │ 2020-10-26 │ 3616 │ ████████████████████████████████████▏ │
|
||||||
|
97. │ 2020-11-02 │ 3586 │ ███████████████████████████████████▋ │
|
||||||
|
98. │ 2020-11-09 │ 3403 │ ██████████████████████████████████ │
|
||||||
|
99. │ 2020-11-16 │ 3336 │ █████████████████████████████████▎ │
|
||||||
|
100. │ 2020-11-23 │ 3230 │ ████████████████████████████████▎ │
|
||||||
|
101. │ 2020-11-30 │ 3183 │ ███████████████████████████████▋ │
|
||||||
|
102. │ 2020-12-07 │ 3285 │ ████████████████████████████████▋ │
|
||||||
|
103. │ 2020-12-14 │ 3367 │ █████████████████████████████████▋ │
|
||||||
|
104. │ 2020-12-21 │ 3748 │ █████████████████████████████████████▍ │
|
||||||
|
105. │ 2020-12-28 │ 3986 │ ███████████████████████████████████████▋ │
|
||||||
|
106. │ 2021-01-04 │ 3906 │ ███████████████████████████████████████ │
|
||||||
|
107. │ 2021-01-11 │ 3425 │ ██████████████████████████████████▎ │
|
||||||
|
108. │ 2021-01-18 │ 3144 │ ███████████████████████████████▍ │
|
||||||
|
109. │ 2021-01-25 │ 3115 │ ███████████████████████████████▏ │
|
||||||
|
110. │ 2021-02-01 │ 3285 │ ████████████████████████████████▋ │
|
||||||
|
111. │ 2021-02-08 │ 3321 │ █████████████████████████████████▏ │
|
||||||
|
112. │ 2021-02-15 │ 3475 │ ██████████████████████████████████▋ │
|
||||||
|
113. │ 2021-02-22 │ 3549 │ ███████████████████████████████████▍ │
|
||||||
|
114. │ 2021-03-01 │ 3755 │ █████████████████████████████████████▌ │
|
||||||
|
115. │ 2021-03-08 │ 3080 │ ██████████████████████████████▋ │
|
||||||
|
116. │ 2021-03-15 │ 3789 │ █████████████████████████████████████▊ │
|
||||||
|
117. │ 2021-03-22 │ 3804 │ ██████████████████████████████████████ │
|
||||||
|
118. │ 2021-03-29 │ 4238 │ ██████████████████████████████████████████▍ │
|
||||||
|
119. │ 2021-04-05 │ 4307 │ ███████████████████████████████████████████ │
|
||||||
|
120. │ 2021-04-12 │ 4225 │ ██████████████████████████████████████████▎ │
|
||||||
|
121. │ 2021-04-19 │ 4391 │ ███████████████████████████████████████████▊ │
|
||||||
|
122. │ 2021-04-26 │ 4868 │ ████████████████████████████████████████████████▋ │
|
||||||
|
123. │ 2021-05-03 │ 4977 │ █████████████████████████████████████████████████▋ │
|
||||||
|
124. │ 2021-05-10 │ 5164 │ ███████████████████████████████████████████████████▋ │
|
||||||
|
125. │ 2021-05-17 │ 4986 │ █████████████████████████████████████████████████▋ │
|
||||||
|
126. │ 2021-05-24 │ 5024 │ ██████████████████████████████████████████████████▏ │
|
||||||
|
127. │ 2021-05-31 │ 4824 │ ████████████████████████████████████████████████▏ │
|
||||||
|
128. │ 2021-06-07 │ 5652 │ ████████████████████████████████████████████████████████▌ │
|
||||||
|
129. │ 2021-06-14 │ 5613 │ ████████████████████████████████████████████████████████▏ │
|
||||||
|
130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │
|
||||||
|
131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │
|
||||||
|
└────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### 在线 Playground {#playground}
|
||||||
|
|
||||||
|
你可以使用交互式资源 [Online Playground](https://play.clickhouse.com/play?user=play) 来尝试对此数据集的其他查询。 例如, [执行这个查询](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). 但是,请注意无法在 Playground 中创建临时表。
|
||||||
|
@ -1,9 +1,339 @@
|
|||||||
---
|
---
|
||||||
slug: /zh/getting-started/example-datasets/recipes
|
slug: /zh/getting-started/example-datasets/recipes
|
||||||
sidebar_label: Recipes Dataset
|
sidebar_label: 食谱数据集
|
||||||
title: "Recipes Dataset"
|
title: "食谱数据集"
|
||||||
---
|
---
|
||||||
|
|
||||||
import Content from '@site/docs/en/getting-started/example-datasets/recipes.md';
|
RecipeNLG 数据集可在 [此处](https://recipenlg.cs.put.poznan.pl/dataset) 下载。其中包含 220 万份食谱。大小略小于 1 GB。
|
||||||
|
|
||||||
<Content />
|
## 下载并解压数据集
|
||||||
|
|
||||||
|
1. 进入下载页面[https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset)。
|
||||||
|
2. 接受条款和条件并下载 zip 文件。
|
||||||
|
3. 使用 `unzip` 解压 zip 文件,得到 `full_dataset.csv` 文件。
|
||||||
|
|
||||||
|
## 创建表
|
||||||
|
|
||||||
|
运行 clickhouse-client 并执行以下 CREATE 请求:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
CREATE TABLE recipes
|
||||||
|
(
|
||||||
|
title String,
|
||||||
|
ingredients Array(String),
|
||||||
|
directions Array(String),
|
||||||
|
link String,
|
||||||
|
source LowCardinality(String),
|
||||||
|
NER Array(String)
|
||||||
|
) ENGINE = MergeTree ORDER BY title;
|
||||||
|
```
|
||||||
|
|
||||||
|
## 插入数据
|
||||||
|
|
||||||
|
运行以下命令:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
clickhouse-client --query "
|
||||||
|
INSERT INTO recipes
|
||||||
|
SELECT
|
||||||
|
title,
|
||||||
|
JSONExtract(ingredients, 'Array(String)'),
|
||||||
|
JSONExtract(directions, 'Array(String)'),
|
||||||
|
link,
|
||||||
|
source,
|
||||||
|
JSONExtract(NER, 'Array(String)')
|
||||||
|
FROM input('num UInt32, title String, ingredients String, directions String, link String, source LowCardinality(String), NER String')
|
||||||
|
FORMAT CSVWithNames
|
||||||
|
" --input_format_with_names_use_header 0 --format_csv_allow_single_quote 0 --input_format_allow_errors_num 10 < full_dataset.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
这是一个展示如何解析自定义 CSV,这其中涉及了许多调整。
|
||||||
|
|
||||||
|
说明:
|
||||||
|
- 数据集为 CSV 格式,但在插入时需要一些预处理;使用表函数 [input](../../sql-reference/table-functions/input.md) 进行预处理;
|
||||||
|
- CSV 文件的结构在表函数 `input` 的参数中指定;
|
||||||
|
- 字段 `num`(行号)是不需要的 - 可以忽略并从文件中进行解析;
|
||||||
|
- 使用 `FORMAT CSVWithNames`,因为标题不包含第一个字段的名称,因此 CSV 中的标题将被忽略(通过命令行参数 `--input_format_with_names_use_header 0`);
|
||||||
|
- 文件仅使用双引号将 CSV 字符串括起来;一些字符串没有用双引号括起来,单引号也不能被解析为括起来的字符串 - 所以添加`--format_csv_allow_single_quote 0`参数接受文件中的单引号;
|
||||||
|
- 由于某些 CSV 的字符串的开头包含 `\M/` 因此无法被解析; CSV 中唯一可能以反斜杠开头的值是 `\N`,这个值被解析为 SQL NULL。通过添加`--input_format_allow_errors_num 10`参数,允许在导入过程中跳过 10 个格式错误;
|
||||||
|
- 在数据集中的 Ingredients、directions 和 NER 字段为数组;但这些数组并没有以一般形式表示:这些字段作为 JSON 序列化为字符串,然后放入 CSV 中 - 在导入是将它们解析为字符串,然后使用 [JSONExtract](../../sql-reference/functions/json-functions.md ) 函数将其转换为数组。
|
||||||
|
|
||||||
|
## 验证插入的数据
|
||||||
|
|
||||||
|
通过检查行数:
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT count() FROM recipes;
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─count()─┐
|
||||||
|
│ 2231141 │
|
||||||
|
└─────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## 示例查询
|
||||||
|
|
||||||
|
### 按配方数量排列的顶级组件:
|
||||||
|
|
||||||
|
在此示例中,我们学习如何使用 [arrayJoin](../../sql-reference/functions/array-join/) 函数将数组扩展为行的集合。
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT
|
||||||
|
arrayJoin(NER) AS k,
|
||||||
|
count() AS c
|
||||||
|
FROM recipes
|
||||||
|
GROUP BY k
|
||||||
|
ORDER BY c DESC
|
||||||
|
LIMIT 50
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─k────────────────────┬──────c─┐
|
||||||
|
│ salt │ 890741 │
|
||||||
|
│ sugar │ 620027 │
|
||||||
|
│ butter │ 493823 │
|
||||||
|
│ flour │ 466110 │
|
||||||
|
│ eggs │ 401276 │
|
||||||
|
│ onion │ 372469 │
|
||||||
|
│ garlic │ 358364 │
|
||||||
|
│ milk │ 346769 │
|
||||||
|
│ water │ 326092 │
|
||||||
|
│ vanilla │ 270381 │
|
||||||
|
│ olive oil │ 197877 │
|
||||||
|
│ pepper │ 179305 │
|
||||||
|
│ brown sugar │ 174447 │
|
||||||
|
│ tomatoes │ 163933 │
|
||||||
|
│ egg │ 160507 │
|
||||||
|
│ baking powder │ 148277 │
|
||||||
|
│ lemon juice │ 146414 │
|
||||||
|
│ Salt │ 122557 │
|
||||||
|
│ cinnamon │ 117927 │
|
||||||
|
│ sour cream │ 116682 │
|
||||||
|
│ cream cheese │ 114423 │
|
||||||
|
│ margarine │ 112742 │
|
||||||
|
│ celery │ 112676 │
|
||||||
|
│ baking soda │ 110690 │
|
||||||
|
│ parsley │ 102151 │
|
||||||
|
│ chicken │ 101505 │
|
||||||
|
│ onions │ 98903 │
|
||||||
|
│ vegetable oil │ 91395 │
|
||||||
|
│ oil │ 85600 │
|
||||||
|
│ mayonnaise │ 84822 │
|
||||||
|
│ pecans │ 79741 │
|
||||||
|
│ nuts │ 78471 │
|
||||||
|
│ potatoes │ 75820 │
|
||||||
|
│ carrots │ 75458 │
|
||||||
|
│ pineapple │ 74345 │
|
||||||
|
│ soy sauce │ 70355 │
|
||||||
|
│ black pepper │ 69064 │
|
||||||
|
│ thyme │ 68429 │
|
||||||
|
│ mustard │ 65948 │
|
||||||
|
│ chicken broth │ 65112 │
|
||||||
|
│ bacon │ 64956 │
|
||||||
|
│ honey │ 64626 │
|
||||||
|
│ oregano │ 64077 │
|
||||||
|
│ ground beef │ 64068 │
|
||||||
|
│ unsalted butter │ 63848 │
|
||||||
|
│ mushrooms │ 61465 │
|
||||||
|
│ Worcestershire sauce │ 59328 │
|
||||||
|
│ cornstarch │ 58476 │
|
||||||
|
│ green pepper │ 58388 │
|
||||||
|
│ Cheddar cheese │ 58354 │
|
||||||
|
└──────────────────────┴────────┘
|
||||||
|
|
||||||
|
50 rows in set. Elapsed: 0.112 sec. Processed 2.23 million rows, 361.57 MB (19.99 million rows/s., 3.24 GB/s.)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 最复杂的草莓食谱
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT
|
||||||
|
title,
|
||||||
|
length(NER),
|
||||||
|
length(directions)
|
||||||
|
FROM recipes
|
||||||
|
WHERE has(NER, 'strawberry')
|
||||||
|
ORDER BY length(directions) DESC
|
||||||
|
LIMIT 10
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─title────────────────────────────────────────────────────────────┬─length(NER)─┬─length(directions)─┐
|
||||||
|
│ Chocolate-Strawberry-Orange Wedding Cake │ 24 │ 126 │
|
||||||
|
│ Strawberry Cream Cheese Crumble Tart │ 19 │ 47 │
|
||||||
|
│ Charlotte-Style Ice Cream │ 11 │ 45 │
|
||||||
|
│ Sinfully Good a Million Layers Chocolate Layer Cake, With Strawb │ 31 │ 45 │
|
||||||
|
│ Sweetened Berries With Elderflower Sherbet │ 24 │ 44 │
|
||||||
|
│ Chocolate-Strawberry Mousse Cake │ 15 │ 42 │
|
||||||
|
│ Rhubarb Charlotte with Strawberries and Rum │ 20 │ 42 │
|
||||||
|
│ Chef Joey's Strawberry Vanilla Tart │ 7 │ 37 │
|
||||||
|
│ Old-Fashioned Ice Cream Sundae Cake │ 17 │ 37 │
|
||||||
|
│ Watermelon Cake │ 16 │ 36 │
|
||||||
|
└──────────────────────────────────────────────────────────────────┴─────────────┴────────────────────┘
|
||||||
|
|
||||||
|
10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.)
|
||||||
|
```
|
||||||
|
|
||||||
|
在此示例中,我们使用 [has](../../sql-reference/functions/array-functions/#hasarr-elem) 函数来按过滤数组类型元素并按 directions 的数量进行排序。
|
||||||
|
|
||||||
|
有一个婚礼蛋糕需要整个126个步骤来制作!显示 directions:
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT arrayJoin(directions)
|
||||||
|
FROM recipes
|
||||||
|
WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake'
|
||||||
|
```
|
||||||
|
|
||||||
|
结果:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─arrayJoin(directions)───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Position 1 rack in center and 1 rack in bottom third of oven and preheat to 350F. │
|
||||||
|
│ Butter one 5-inch-diameter cake pan with 2-inch-high sides, one 8-inch-diameter cake pan with 2-inch-high sides and one 12-inch-diameter cake pan with 2-inch-high sides. │
|
||||||
|
│ Dust pans with flour; line bottoms with parchment. │
|
||||||
|
│ Combine 1/3 cup orange juice and 2 ounces unsweetened chocolate in heavy small saucepan. │
|
||||||
|
│ Stir mixture over medium-low heat until chocolate melts. │
|
||||||
|
│ Remove from heat. │
|
||||||
|
│ Gradually mix in 1 2/3 cups orange juice. │
|
||||||
|
│ Sift 3 cups flour, 2/3 cup cocoa, 2 teaspoons baking soda, 1 teaspoon salt and 1/2 teaspoon baking powder into medium bowl. │
|
||||||
|
│ using electric mixer, beat 1 cup (2 sticks) butter and 3 cups sugar in large bowl until blended (mixture will look grainy). │
|
||||||
|
│ Add 4 eggs, 1 at a time, beating to blend after each. │
|
||||||
|
│ Beat in 1 tablespoon orange peel and 1 tablespoon vanilla extract. │
|
||||||
|
│ Add dry ingredients alternately with orange juice mixture in 3 additions each, beating well after each addition. │
|
||||||
|
│ Mix in 1 cup chocolate chips. │
|
||||||
|
│ Transfer 1 cup plus 2 tablespoons batter to prepared 5-inch pan, 3 cups batter to prepared 8-inch pan and remaining batter (about 6 cups) to 12-inch pan. │
|
||||||
|
│ Place 5-inch and 8-inch pans on center rack of oven. │
|
||||||
|
│ Place 12-inch pan on lower rack of oven. │
|
||||||
|
│ Bake cakes until tester inserted into center comes out clean, about 35 minutes. │
|
||||||
|
│ Transfer cakes in pans to racks and cool completely. │
|
||||||
|
│ Mark 4-inch diameter circle on one 6-inch-diameter cardboard cake round. │
|
||||||
|
│ Cut out marked circle. │
|
||||||
|
│ Mark 7-inch-diameter circle on one 8-inch-diameter cardboard cake round. │
|
||||||
|
│ Cut out marked circle. │
|
||||||
|
│ Mark 11-inch-diameter circle on one 12-inch-diameter cardboard cake round. │
|
||||||
|
│ Cut out marked circle. │
|
||||||
|
│ Cut around sides of 5-inch-cake to loosen. │
|
||||||
|
│ Place 4-inch cardboard over pan. │
|
||||||
|
│ Hold cardboard and pan together; turn cake out onto cardboard. │
|
||||||
|
│ Peel off parchment.Wrap cakes on its cardboard in foil. │
|
||||||
|
│ Repeat turning out, peeling off parchment and wrapping cakes in foil, using 7-inch cardboard for 8-inch cake and 11-inch cardboard for 12-inch cake. │
|
||||||
|
│ Using remaining ingredients, make 1 more batch of cake batter and bake 3 more cake layers as described above. │
|
||||||
|
│ Cool cakes in pans. │
|
||||||
|
│ Cover cakes in pans tightly with foil. │
|
||||||
|
│ (Can be prepared ahead. │
|
||||||
|
│ Let stand at room temperature up to 1 day or double-wrap all cake layers and freeze up to 1 week. │
|
||||||
|
│ Bring cake layers to room temperature before using.) │
|
||||||
|
│ Place first 12-inch cake on its cardboard on work surface. │
|
||||||
|
│ Spread 2 3/4 cups ganache over top of cake and all the way to edge. │
|
||||||
|
│ Spread 2/3 cup jam over ganache, leaving 1/2-inch chocolate border at edge. │
|
||||||
|
│ Drop 1 3/4 cups white chocolate frosting by spoonfuls over jam. │
|
||||||
|
│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
|
||||||
|
│ Rub some cocoa powder over second 12-inch cardboard. │
|
||||||
|
│ Cut around sides of second 12-inch cake to loosen. │
|
||||||
|
│ Place cardboard, cocoa side down, over pan. │
|
||||||
|
│ Turn cake out onto cardboard. │
|
||||||
|
│ Peel off parchment. │
|
||||||
|
│ Carefully slide cake off cardboard and onto filling on first 12-inch cake. │
|
||||||
|
│ Refrigerate. │
|
||||||
|
│ Place first 8-inch cake on its cardboard on work surface. │
|
||||||
|
│ Spread 1 cup ganache over top all the way to edge. │
|
||||||
|
│ Spread 1/4 cup jam over, leaving 1/2-inch chocolate border at edge. │
|
||||||
|
│ Drop 1 cup white chocolate frosting by spoonfuls over jam. │
|
||||||
|
│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
|
||||||
|
│ Rub some cocoa over second 8-inch cardboard. │
|
||||||
|
│ Cut around sides of second 8-inch cake to loosen. │
|
||||||
|
│ Place cardboard, cocoa side down, over pan. │
|
||||||
|
│ Turn cake out onto cardboard. │
|
||||||
|
│ Peel off parchment. │
|
||||||
|
│ Slide cake off cardboard and onto filling on first 8-inch cake. │
|
||||||
|
│ Refrigerate. │
|
||||||
|
│ Place first 5-inch cake on its cardboard on work surface. │
|
||||||
|
│ Spread 1/2 cup ganache over top of cake and all the way to edge. │
|
||||||
|
│ Spread 2 tablespoons jam over, leaving 1/2-inch chocolate border at edge. │
|
||||||
|
│ Drop 1/3 cup white chocolate frosting by spoonfuls over jam. │
|
||||||
|
│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
|
||||||
|
│ Rub cocoa over second 6-inch cardboard. │
|
||||||
|
│ Cut around sides of second 5-inch cake to loosen. │
|
||||||
|
│ Place cardboard, cocoa side down, over pan. │
|
||||||
|
│ Turn cake out onto cardboard. │
|
||||||
|
│ Peel off parchment. │
|
||||||
|
│ Slide cake off cardboard and onto filling on first 5-inch cake. │
|
||||||
|
│ Chill all cakes 1 hour to set filling. │
|
||||||
|
│ Place 12-inch tiered cake on its cardboard on revolving cake stand. │
|
||||||
|
│ Spread 2 2/3 cups frosting over top and sides of cake as a first coat. │
|
||||||
|
│ Refrigerate cake. │
|
||||||
|
│ Place 8-inch tiered cake on its cardboard on cake stand. │
|
||||||
|
│ Spread 1 1/4 cups frosting over top and sides of cake as a first coat. │
|
||||||
|
│ Refrigerate cake. │
|
||||||
|
│ Place 5-inch tiered cake on its cardboard on cake stand. │
|
||||||
|
│ Spread 3/4 cup frosting over top and sides of cake as a first coat. │
|
||||||
|
│ Refrigerate all cakes until first coats of frosting set, about 1 hour. │
|
||||||
|
│ (Cakes can be made to this point up to 1 day ahead; cover and keep refrigerate.) │
|
||||||
|
│ Prepare second batch of frosting, using remaining frosting ingredients and following directions for first batch. │
|
||||||
|
│ Spoon 2 cups frosting into pastry bag fitted with small star tip. │
|
||||||
|
│ Place 12-inch cake on its cardboard on large flat platter. │
|
||||||
|
│ Place platter on cake stand. │
|
||||||
|
│ Using icing spatula, spread 2 1/2 cups frosting over top and sides of cake; smooth top. │
|
||||||
|
│ Using filled pastry bag, pipe decorative border around top edge of cake. │
|
||||||
|
│ Refrigerate cake on platter. │
|
||||||
|
│ Place 8-inch cake on its cardboard on cake stand. │
|
||||||
|
│ Using icing spatula, spread 1 1/2 cups frosting over top and sides of cake; smooth top. │
|
||||||
|
│ Using pastry bag, pipe decorative border around top edge of cake. │
|
||||||
|
│ Refrigerate cake on its cardboard. │
|
||||||
|
│ Place 5-inch cake on its cardboard on cake stand. │
|
||||||
|
│ Using icing spatula, spread 3/4 cup frosting over top and sides of cake; smooth top. │
|
||||||
|
│ Using pastry bag, pipe decorative border around top edge of cake, spooning more frosting into bag if necessary. │
|
||||||
|
│ Refrigerate cake on its cardboard. │
|
||||||
|
│ Keep all cakes refrigerated until frosting sets, about 2 hours. │
|
||||||
|
│ (Can be prepared 2 days ahead. │
|
||||||
|
│ Cover loosely; keep refrigerated.) │
|
||||||
|
│ Place 12-inch cake on platter on work surface. │
|
||||||
|
│ Press 1 wooden dowel straight down into and completely through center of cake. │
|
||||||
|
│ Mark dowel 1/4 inch above top of frosting. │
|
||||||
|
│ Remove dowel and cut with serrated knife at marked point. │
|
||||||
|
│ Cut 4 more dowels to same length. │
|
||||||
|
│ Press 1 cut dowel back into center of cake. │
|
||||||
|
│ Press remaining 4 cut dowels into cake, positioning 3 1/2 inches inward from cake edges and spacing evenly. │
|
||||||
|
│ Place 8-inch cake on its cardboard on work surface. │
|
||||||
|
│ Press 1 dowel straight down into and completely through center of cake. │
|
||||||
|
│ Mark dowel 1/4 inch above top of frosting. │
|
||||||
|
│ Remove dowel and cut with serrated knife at marked point. │
|
||||||
|
│ Cut 3 more dowels to same length. │
|
||||||
|
│ Press 1 cut dowel back into center of cake. │
|
||||||
|
│ Press remaining 3 cut dowels into cake, positioning 2 1/2 inches inward from edges and spacing evenly. │
|
||||||
|
│ Using large metal spatula as aid, place 8-inch cake on its cardboard atop dowels in 12-inch cake, centering carefully. │
|
||||||
|
│ Gently place 5-inch cake on its cardboard atop dowels in 8-inch cake, centering carefully. │
|
||||||
|
│ Using citrus stripper, cut long strips of orange peel from oranges. │
|
||||||
|
│ Cut strips into long segments. │
|
||||||
|
│ To make orange peel coils, wrap peel segment around handle of wooden spoon; gently slide peel off handle so that peel keeps coiled shape. │
|
||||||
|
│ Garnish cake with orange peel coils, ivy or mint sprigs, and some berries. │
|
||||||
|
│ (Assembled cake can be made up to 8 hours ahead. │
|
||||||
|
│ Let stand at cool room temperature.) │
|
||||||
|
│ Remove top and middle cake tiers. │
|
||||||
|
│ Remove dowels from cakes. │
|
||||||
|
│ Cut top and middle cakes into slices. │
|
||||||
|
│ To cut 12-inch cake: Starting 3 inches inward from edge and inserting knife straight down, cut through from top to bottom to make 6-inch-diameter circle in center of cake. │
|
||||||
|
│ Cut outer portion of cake into slices; cut inner portion into slices and serve with strawberries. │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
126 rows in set. Elapsed: 0.011 sec. Processed 8.19 thousand rows, 5.34 MB (737.75 thousand rows/s., 480.59 MB/s.)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 在线 Playground
|
||||||
|
|
||||||
|
此数据集也可在 [在线 Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==) 中体验。
|
||||||
|
|
||||||
|
[原文链接](https://clickhouse.com/docs/en/getting-started/example-datasets/recipes/)
|
||||||
|
@ -42,9 +42,9 @@ ORDER BY (postcode1, postcode2, addr1, addr2);
|
|||||||
|
|
||||||
- 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`,因为这更适合存储和查询
|
- 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`,因为这更适合存储和查询
|
||||||
- 将`time` 字段转换为日期为它只包含 00:00 时间
|
- 将`time` 字段转换为日期为它只包含 00:00 时间
|
||||||
- 忽略 [UUid](../../sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析
|
- 忽略 [UUid](/docs/zh/sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析
|
||||||
- 使用 [transform](../../sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段
|
- 使用 [transform](/docs/zh/sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段
|
||||||
- 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1
|
- 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](/docs/zh/sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1
|
||||||
- 删除最后两列,因为它们都具有相同的值(即 0)
|
- 删除最后两列,因为它们都具有相同的值(即 0)
|
||||||
|
|
||||||
`url` 函数将来自网络服务器的数据流式传输到 ClickHouse 表中。以下命令将 500 万行插入到 `uk_price_paid` 表中:
|
`url` 函数将来自网络服务器的数据流式传输到 ClickHouse 表中。以下命令将 500 万行插入到 `uk_price_paid` 表中:
|
||||||
@ -342,7 +342,7 @@ LIMIT 100
|
|||||||
|
|
||||||
## 使用 Projection 加速查询 {#speedup-with-projections}
|
## 使用 Projection 加速查询 {#speedup-with-projections}
|
||||||
|
|
||||||
[Projections](../../sql-reference/statements/alter/projection.md) 允许我们通过存储任意格式的预先聚合的数据来提高查询速度。在此示例中,我们创建了一个按年份、地区和城镇分组的房产的平均价格、总价格和数量的 Projection。在执行时,如果 ClickHouse 认为 Projection 可以提高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。
|
[Projections](/docs/zh/sql-reference/statements/alter/projection.mdx) 允许我们通过存储任意格式的预先聚合的数据来提高查询速度。在此示例中,我们创建了一个按年份、地区和城镇分组的房产的平均价格、总价格和数量的 Projection。在执行时,如果 ClickHouse 认为 Projection 可以提高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。
|
||||||
|
|
||||||
### 构建投影{#build-projection}
|
### 构建投影{#build-projection}
|
||||||
|
|
||||||
|
@ -0,0 +1,38 @@
|
|||||||
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||||
|
#include <AggregateFunctions/AggregateFunctionAnalysisOfVariance.h>
|
||||||
|
#include <AggregateFunctions/FactoryHelpers.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string & name, const DataTypes & arguments, const Array & parameters, const Settings *)
|
||||||
|
{
|
||||||
|
assertNoParameters(name, parameters);
|
||||||
|
assertBinary(name, arguments);
|
||||||
|
|
||||||
|
if (!isNumber(arguments[0]) || !isNumber(arguments[1]))
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} only supports numerical types", name);
|
||||||
|
|
||||||
|
return std::make_shared<AggregateFunctionAnalysisOfVariance>(arguments, parameters);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
AggregateFunctionProperties properties = { .is_order_dependent = false };
|
||||||
|
factory.registerFunction("analysisOfVariance", {createAggregateFunctionAnalysisOfVariance, properties}, AggregateFunctionFactory::CaseInsensitive);
|
||||||
|
|
||||||
|
/// This is widely used term
|
||||||
|
factory.registerAlias("anova", "analysisOfVariance", AggregateFunctionFactory::CaseInsensitive);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
98
src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h
Normal file
98
src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <IO/VarInt.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
|
#include <Columns/ColumnNullable.h>
|
||||||
|
#include <Columns/ColumnsCommon.h>
|
||||||
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
|
#include <AggregateFunctions/Moments.h>
|
||||||
|
#include "Common/NaNUtils.h"
|
||||||
|
#include <Common/assert_cast.h>
|
||||||
|
#include <Core/Types.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
class AggregateFunctionAnalysisOfVarianceData final : public AnalysisOfVarianceMoments<Float64>
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// One way analysis of variance
|
||||||
|
/// Provides a statistical test of whether two or more population means are equal (null hypothesis)
|
||||||
|
/// Has an assumption that subjects from group i have normal distribution.
|
||||||
|
/// Accepts two arguments - a value and a group number which this value belongs to.
|
||||||
|
/// Groups are enumerated starting from 0 and there should be at least two groups to perform a test
|
||||||
|
/// Moreover there should be at least one group with the number of observations greater than one.
|
||||||
|
class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataHelper<AggregateFunctionAnalysisOfVarianceData, AggregateFunctionAnalysisOfVariance>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params)
|
||||||
|
: IAggregateFunctionDataHelper(arguments, params)
|
||||||
|
{}
|
||||||
|
|
||||||
|
DataTypePtr getReturnType() const override
|
||||||
|
{
|
||||||
|
DataTypes types {std::make_shared<DataTypeNumber<Float64>>(), std::make_shared<DataTypeNumber<Float64>>() };
|
||||||
|
Strings names {"f_statistic", "p_value"};
|
||||||
|
return std::make_shared<DataTypeTuple>(
|
||||||
|
std::move(types),
|
||||||
|
std::move(names)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "analysisOfVariance"; }
|
||||||
|
|
||||||
|
bool allocatesMemoryInArena() const override { return false; }
|
||||||
|
|
||||||
|
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||||
|
{
|
||||||
|
data(place).add(columns[0]->getFloat64(row_num), columns[1]->getUInt(row_num));
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||||
|
{
|
||||||
|
data(place).merge(data(rhs));
|
||||||
|
}
|
||||||
|
|
||||||
|
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||||
|
{
|
||||||
|
data(place).write(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||||
|
{
|
||||||
|
data(place).read(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||||
|
{
|
||||||
|
auto f_stat = data(place).getFStatistic();
|
||||||
|
if (std::isinf(f_stat) || isNaN(f_stat))
|
||||||
|
throw Exception("F statistic is not defined or infinite for these arguments", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
auto p_value = data(place).getPValue(f_stat);
|
||||||
|
|
||||||
|
/// Because p-value is a probability.
|
||||||
|
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||||
|
|
||||||
|
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||||
|
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||||
|
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||||
|
|
||||||
|
column_stat.getData().push_back(f_stat);
|
||||||
|
column_value.getData().push_back(p_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -156,6 +156,11 @@ public:
|
|||||||
nested_func->insertResultInto(place, to, arena);
|
nested_func->insertResultInto(place, to, arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
nested_func->insertMergeResultInto(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
bool allocatesMemoryInArena() const override
|
bool allocatesMemoryInArena() const override
|
||||||
{
|
{
|
||||||
return nested_func->allocatesMemoryInArena();
|
return nested_func->allocatesMemoryInArena();
|
||||||
|
@ -196,7 +196,8 @@ public:
|
|||||||
this->data(place).deserialize(buf, arena);
|
this->data(place).deserialize(buf, arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
template <bool MergeResult>
|
||||||
|
void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const
|
||||||
{
|
{
|
||||||
auto arguments = this->data(place).getArguments(this->argument_types);
|
auto arguments = this->data(place).getArguments(this->argument_types);
|
||||||
ColumnRawPtrs arguments_raw(arguments.size());
|
ColumnRawPtrs arguments_raw(arguments.size());
|
||||||
@ -205,9 +206,22 @@ public:
|
|||||||
|
|
||||||
assert(!arguments.empty());
|
assert(!arguments.empty());
|
||||||
nested_func->addBatchSinglePlace(0, arguments[0]->size(), getNestedPlace(place), arguments_raw.data(), arena);
|
nested_func->addBatchSinglePlace(0, arguments[0]->size(), getNestedPlace(place), arguments_raw.data(), arena);
|
||||||
|
if constexpr (MergeResult)
|
||||||
|
nested_func->insertMergeResultInto(getNestedPlace(place), to, arena);
|
||||||
|
else
|
||||||
nested_func->insertResultInto(getNestedPlace(place), to, arena);
|
nested_func->insertResultInto(getNestedPlace(place), to, arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<false>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<true>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
size_t sizeOfData() const override
|
size_t sizeOfData() const override
|
||||||
{
|
{
|
||||||
return prefix_size + nested_func->sizeOfData();
|
return prefix_size + nested_func->sizeOfData();
|
||||||
|
@ -257,7 +257,8 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
template <bool merge>
|
||||||
|
void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const
|
||||||
{
|
{
|
||||||
AggregateFunctionForEachData & state = data(place);
|
AggregateFunctionForEachData & state = data(place);
|
||||||
|
|
||||||
@ -268,6 +269,9 @@ public:
|
|||||||
char * nested_state = state.array_of_aggregate_datas;
|
char * nested_state = state.array_of_aggregate_datas;
|
||||||
for (size_t i = 0; i < state.dynamic_array_size; ++i)
|
for (size_t i = 0; i < state.dynamic_array_size; ++i)
|
||||||
{
|
{
|
||||||
|
if constexpr (merge)
|
||||||
|
nested_func->insertMergeResultInto(nested_state, elems_to, arena);
|
||||||
|
else
|
||||||
nested_func->insertResultInto(nested_state, elems_to, arena);
|
nested_func->insertResultInto(nested_state, elems_to, arena);
|
||||||
nested_state += nested_size_of_data;
|
nested_state += nested_size_of_data;
|
||||||
}
|
}
|
||||||
@ -275,6 +279,16 @@ public:
|
|||||||
offsets_to.push_back(offsets_to.back() + state.dynamic_array_size);
|
offsets_to.push_back(offsets_to.back() + state.dynamic_array_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<false>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<true>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
bool allocatesMemoryInArena() const override
|
bool allocatesMemoryInArena() const override
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
|
@ -183,6 +183,11 @@ public:
|
|||||||
nested_func->insertResultInto(place, to, arena);
|
nested_func->insertResultInto(place, to, arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
nested_func->insertMergeResultInto(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
bool allocatesMemoryInArena() const override
|
bool allocatesMemoryInArena() const override
|
||||||
{
|
{
|
||||||
return nested_func->allocatesMemoryInArena();
|
return nested_func->allocatesMemoryInArena();
|
||||||
|
@ -264,7 +264,8 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
template <bool merge>
|
||||||
|
void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const
|
||||||
{
|
{
|
||||||
auto & map_column = assert_cast<ColumnMap &>(to);
|
auto & map_column = assert_cast<ColumnMap &>(to);
|
||||||
auto & nested_column = map_column.getNestedColumn();
|
auto & nested_column = map_column.getNestedColumn();
|
||||||
@ -288,6 +289,9 @@ public:
|
|||||||
for (auto & key : keys)
|
for (auto & key : keys)
|
||||||
{
|
{
|
||||||
key_column.insert(key);
|
key_column.insert(key);
|
||||||
|
if constexpr (merge)
|
||||||
|
nested_func->insertMergeResultInto(merged_maps[key], val_column, arena);
|
||||||
|
else
|
||||||
nested_func->insertResultInto(merged_maps[key], val_column, arena);
|
nested_func->insertResultInto(merged_maps[key], val_column, arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -295,6 +299,16 @@ public:
|
|||||||
res_offsets.push_back(val_column.size());
|
res_offsets.push_back(val_column.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<false>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<true>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
bool allocatesMemoryInArena() const override { return true; }
|
bool allocatesMemoryInArena() const override { return true; }
|
||||||
|
|
||||||
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
|
||||||
|
@ -163,13 +163,17 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
template <bool merge>
|
||||||
|
void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const
|
||||||
{
|
{
|
||||||
if constexpr (result_is_nullable)
|
if constexpr (result_is_nullable)
|
||||||
{
|
{
|
||||||
ColumnNullable & to_concrete = assert_cast<ColumnNullable &>(to);
|
ColumnNullable & to_concrete = assert_cast<ColumnNullable &>(to);
|
||||||
if (getFlag(place))
|
if (getFlag(place))
|
||||||
{
|
{
|
||||||
|
if constexpr (merge)
|
||||||
|
nested_function->insertMergeResultInto(nestedPlace(place), to_concrete.getNestedColumn(), arena);
|
||||||
|
else
|
||||||
nested_function->insertResultInto(nestedPlace(place), to_concrete.getNestedColumn(), arena);
|
nested_function->insertResultInto(nestedPlace(place), to_concrete.getNestedColumn(), arena);
|
||||||
to_concrete.getNullMapData().push_back(0);
|
to_concrete.getNullMapData().push_back(0);
|
||||||
}
|
}
|
||||||
@ -180,10 +184,23 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if constexpr (merge)
|
||||||
|
nested_function->insertMergeResultInto(nestedPlace(place), to, arena);
|
||||||
|
else
|
||||||
nested_function->insertResultInto(nestedPlace(place), to, arena);
|
nested_function->insertResultInto(nestedPlace(place), to, arena);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<false>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<true>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
bool allocatesMemoryInArena() const override
|
bool allocatesMemoryInArena() const override
|
||||||
{
|
{
|
||||||
return nested_function->allocatesMemoryInArena();
|
return nested_function->allocatesMemoryInArena();
|
||||||
|
@ -265,10 +265,11 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void insertResultInto(
|
template <bool merge>
|
||||||
|
void insertResultIntoImpl(
|
||||||
AggregateDataPtr __restrict place,
|
AggregateDataPtr __restrict place,
|
||||||
IColumn & to,
|
IColumn & to,
|
||||||
Arena * arena) const override
|
Arena * arena) const
|
||||||
{
|
{
|
||||||
if (place[size_of_data])
|
if (place[size_of_data])
|
||||||
{
|
{
|
||||||
@ -277,7 +278,12 @@ public:
|
|||||||
// -OrNull
|
// -OrNull
|
||||||
|
|
||||||
if (inner_nullable)
|
if (inner_nullable)
|
||||||
|
{
|
||||||
|
if constexpr (merge)
|
||||||
|
nested_function->insertMergeResultInto(place, to, arena);
|
||||||
|
else
|
||||||
nested_function->insertResultInto(place, to, arena);
|
nested_function->insertResultInto(place, to, arena);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ColumnNullable & col = typeid_cast<ColumnNullable &>(to);
|
ColumnNullable & col = typeid_cast<ColumnNullable &>(to);
|
||||||
@ -289,7 +295,9 @@ public:
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// -OrDefault
|
// -OrDefault
|
||||||
|
if constexpr (merge)
|
||||||
|
nested_function->insertMergeResultInto(place, to, arena);
|
||||||
|
else
|
||||||
nested_function->insertResultInto(place, to, arena);
|
nested_function->insertResultInto(place, to, arena);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -297,6 +305,16 @@ public:
|
|||||||
to.insertDefault();
|
to.insertDefault();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<false>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<true>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
|
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -195,17 +195,33 @@ public:
|
|||||||
return std::make_shared<DataTypeArray>(nested_function->getReturnType());
|
return std::make_shared<DataTypeArray>(nested_function->getReturnType());
|
||||||
}
|
}
|
||||||
|
|
||||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
template <bool merge>
|
||||||
|
void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const
|
||||||
{
|
{
|
||||||
auto & col = assert_cast<ColumnArray &>(to);
|
auto & col = assert_cast<ColumnArray &>(to);
|
||||||
auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
|
auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
|
||||||
|
|
||||||
for (size_t i = 0; i < total; ++i)
|
for (size_t i = 0; i < total; ++i)
|
||||||
|
{
|
||||||
|
if constexpr (merge)
|
||||||
|
nested_function->insertMergeResultInto(place + i * size_of_data, col.getData(), arena);
|
||||||
|
else
|
||||||
nested_function->insertResultInto(place + i * size_of_data, col.getData(), arena);
|
nested_function->insertResultInto(place + i * size_of_data, col.getData(), arena);
|
||||||
|
}
|
||||||
|
|
||||||
col_offsets.getData().push_back(col.getData().size());
|
col_offsets.getData().push_back(col.getData().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<false>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
|
||||||
|
{
|
||||||
|
insertResultIntoImpl<true>(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
|
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -111,6 +111,11 @@ public:
|
|||||||
assert_cast<ColumnAggregateFunction &>(to).getData().push_back(place);
|
assert_cast<ColumnAggregateFunction &>(to).getData().push_back(place);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||||
|
{
|
||||||
|
assert_cast<ColumnAggregateFunction &>(to).insertFrom(place);
|
||||||
|
}
|
||||||
|
|
||||||
/// Aggregate function or aggregate function state.
|
/// Aggregate function or aggregate function state.
|
||||||
bool isState() const override { return true; }
|
bool isState() const override { return true; }
|
||||||
|
|
||||||
|
@ -164,6 +164,18 @@ public:
|
|||||||
/// window function.
|
/// window function.
|
||||||
virtual void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const = 0;
|
virtual void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const = 0;
|
||||||
|
|
||||||
|
/// Special method for aggregate functions with -State combinator, it behaves the same way as insertResultInto,
|
||||||
|
/// but if we need to insert AggregateData into ColumnAggregateFunction we use special method
|
||||||
|
/// insertInto that inserts default value and then performs merge with provided AggregateData
|
||||||
|
/// instead of just copying pointer to this AggregateData. Used in WindowTransform.
|
||||||
|
virtual void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const
|
||||||
|
{
|
||||||
|
if (isState())
|
||||||
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} is marked as State but method insertMergeResultInto is not implemented");
|
||||||
|
|
||||||
|
insertResultInto(place, to, arena);
|
||||||
|
}
|
||||||
|
|
||||||
/// Used for machine learning methods. Predict result from trained model.
|
/// Used for machine learning methods. Predict result from trained model.
|
||||||
/// Will insert result into `to` column for rows in range [offset, offset + limit).
|
/// Will insert result into `to` column for rows in range [offset, offset + limit).
|
||||||
virtual void predictValues(
|
virtual void predictValues(
|
||||||
|
@ -4,7 +4,9 @@
|
|||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <boost/math/distributions/students_t.hpp>
|
#include <boost/math/distributions/students_t.hpp>
|
||||||
#include <boost/math/distributions/normal.hpp>
|
#include <boost/math/distributions/normal.hpp>
|
||||||
|
#include <boost/math/distributions/fisher_f.hpp>
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -13,6 +15,7 @@ struct Settings;
|
|||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
extern const int DECIMAL_OVERFLOW;
|
extern const int DECIMAL_OVERFLOW;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -476,4 +479,127 @@ struct ZTestMoments
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct AnalysisOfVarianceMoments
|
||||||
|
{
|
||||||
|
/// Sums of values within a group
|
||||||
|
std::vector<T> xs1{};
|
||||||
|
/// Sums of squared values within a group
|
||||||
|
std::vector<T> xs2{};
|
||||||
|
/// Sizes of each group. Total number of observations is just a sum of all these values
|
||||||
|
std::vector<size_t> ns{};
|
||||||
|
|
||||||
|
void resizeIfNeeded(size_t possible_size)
|
||||||
|
{
|
||||||
|
if (xs1.size() >= possible_size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
xs1.resize(possible_size, 0.0);
|
||||||
|
xs2.resize(possible_size, 0.0);
|
||||||
|
ns.resize(possible_size, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(T value, size_t group)
|
||||||
|
{
|
||||||
|
resizeIfNeeded(group + 1);
|
||||||
|
xs1[group] += value;
|
||||||
|
xs2[group] += value * value;
|
||||||
|
ns[group] += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge(const AnalysisOfVarianceMoments & rhs)
|
||||||
|
{
|
||||||
|
resizeIfNeeded(rhs.xs1.size());
|
||||||
|
for (size_t i = 0; i < rhs.xs1.size(); ++i)
|
||||||
|
{
|
||||||
|
xs1[i] += rhs.xs1[i];
|
||||||
|
xs2[i] += rhs.xs2[i];
|
||||||
|
ns[i] += rhs.ns[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(WriteBuffer & buf) const
|
||||||
|
{
|
||||||
|
writeVectorBinary(xs1, buf);
|
||||||
|
writeVectorBinary(xs2, buf);
|
||||||
|
writeVectorBinary(ns, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void read(ReadBuffer & buf)
|
||||||
|
{
|
||||||
|
readVectorBinary(xs1, buf);
|
||||||
|
readVectorBinary(xs2, buf);
|
||||||
|
readVectorBinary(ns, buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
Float64 getMeanAll() const
|
||||||
|
{
|
||||||
|
const auto n = std::accumulate(ns.begin(), ns.end(), 0UL);
|
||||||
|
if (n == 0)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are no observations to calculate mean value");
|
||||||
|
|
||||||
|
return std::accumulate(xs1.begin(), xs1.end(), 0.0) / n;
|
||||||
|
}
|
||||||
|
|
||||||
|
Float64 getMeanGroup(size_t group) const
|
||||||
|
{
|
||||||
|
if (ns[group] == 0)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no observations for group {}", group);
|
||||||
|
|
||||||
|
return xs1[group] / ns[group];
|
||||||
|
}
|
||||||
|
|
||||||
|
Float64 getBetweenGroupsVariation() const
|
||||||
|
{
|
||||||
|
Float64 res = 0;
|
||||||
|
auto mean = getMeanAll();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < xs1.size(); ++i)
|
||||||
|
{
|
||||||
|
auto group_mean = getMeanGroup(i);
|
||||||
|
res += ns[i] * (group_mean - mean) * (group_mean - mean);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
Float64 getWithinGroupsVariation() const
|
||||||
|
{
|
||||||
|
Float64 res = 0;
|
||||||
|
for (size_t i = 0; i < xs1.size(); ++i)
|
||||||
|
{
|
||||||
|
auto group_mean = getMeanGroup(i);
|
||||||
|
res += xs2[i] + ns[i] * group_mean * group_mean - 2 * group_mean * xs1[i];
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
Float64 getFStatistic() const
|
||||||
|
{
|
||||||
|
const auto k = xs1.size();
|
||||||
|
const auto n = std::accumulate(ns.begin(), ns.end(), 0UL);
|
||||||
|
|
||||||
|
if (k == 1)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There should be more than one group to calculate f-statistics");
|
||||||
|
|
||||||
|
if (k == n)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is only one observation in each group");
|
||||||
|
|
||||||
|
return (getBetweenGroupsVariation() * (n - k)) / (getWithinGroupsVariation() * (k - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
Float64 getPValue(Float64 f_statistic) const
|
||||||
|
{
|
||||||
|
const auto k = xs1.size();
|
||||||
|
const auto n = std::accumulate(ns.begin(), ns.end(), 0UL);
|
||||||
|
|
||||||
|
if (k == 1)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There should be more than one group to calculate f-statistics");
|
||||||
|
|
||||||
|
if (k == n)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is only one observation in each group");
|
||||||
|
|
||||||
|
return 1.0f - boost::math::cdf(boost::math::fisher_f(k - 1, n - k), f_statistic);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -72,6 +72,7 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
|
|||||||
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||||
|
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
|
||||||
|
|
||||||
class AggregateFunctionCombinatorFactory;
|
class AggregateFunctionCombinatorFactory;
|
||||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||||
@ -156,6 +157,7 @@ void registerAggregateFunctions()
|
|||||||
registerAggregateFunctionIntervalLengthSum(factory);
|
registerAggregateFunctionIntervalLengthSum(factory);
|
||||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||||
registerAggregateFunctionSparkbar(factory);
|
registerAggregateFunctionSparkbar(factory);
|
||||||
|
registerAggregateFunctionAnalysisOfVariance(factory);
|
||||||
|
|
||||||
registerWindowFunctions(factory);
|
registerWindowFunctions(factory);
|
||||||
}
|
}
|
||||||
|
@ -468,6 +468,16 @@ bool QueryFuzzer::isSuitableForFuzzing(const ASTCreateQuery & create)
|
|||||||
return create.columns_list && create.columns_list->columns;
|
return create.columns_list && create.columns_list->columns;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static String getOriginalTableName(const String & full_name)
|
||||||
|
{
|
||||||
|
return full_name.substr(0, full_name.find("__fuzz_"));
|
||||||
|
}
|
||||||
|
|
||||||
|
static String getFuzzedTableName(const String & original_name, size_t index)
|
||||||
|
{
|
||||||
|
return original_name + "__fuzz_" + toString(index);
|
||||||
|
}
|
||||||
|
|
||||||
void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create)
|
void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create)
|
||||||
{
|
{
|
||||||
if (create.columns_list && create.columns_list->columns)
|
if (create.columns_list && create.columns_list->columns)
|
||||||
@ -501,10 +511,9 @@ void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create)
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto full_name = create.getTable();
|
auto full_name = create.getTable();
|
||||||
auto original_name = full_name.substr(0, full_name.find("__fuzz_"));
|
auto original_name = getOriginalTableName(full_name);
|
||||||
|
|
||||||
size_t index = index_of_fuzzed_table[original_name]++;
|
size_t index = index_of_fuzzed_table[original_name]++;
|
||||||
auto new_name = original_name + "__fuzz_" + toString(index);
|
auto new_name = getFuzzedTableName(original_name, index);
|
||||||
|
|
||||||
create.setTable(new_name);
|
create.setTable(new_name);
|
||||||
|
|
||||||
@ -665,7 +674,8 @@ void QueryFuzzer::fuzzTableName(ASTTableExpression & table)
|
|||||||
if (table_id.empty())
|
if (table_id.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
auto it = original_table_name_to_fuzzed.find(table_id.getTableName());
|
auto original_name = getOriginalTableName(table_id.getTableName());
|
||||||
|
auto it = original_table_name_to_fuzzed.find(original_name);
|
||||||
if (it != original_table_name_to_fuzzed.end() && !it->second.empty())
|
if (it != original_table_name_to_fuzzed.end() && !it->second.empty())
|
||||||
{
|
{
|
||||||
auto new_table_name = it->second.begin();
|
auto new_table_name = it->second.begin();
|
||||||
@ -728,7 +738,7 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
|
|||||||
/// Drop all created tables, not only unique ones.
|
/// Drop all created tables, not only unique ones.
|
||||||
for (size_t i = 0; i < it->second; ++i)
|
for (size_t i = 0; i < it->second; ++i)
|
||||||
{
|
{
|
||||||
auto fuzzed_name = table_name + "__fuzz_" + toString(i);
|
auto fuzzed_name = getFuzzedTableName(table_name, i);
|
||||||
auto & query = queries.emplace_back(drop_query.clone());
|
auto & query = queries.emplace_back(drop_query.clone());
|
||||||
query->as<ASTDropQuery>()->setTable(fuzzed_name);
|
query->as<ASTDropQuery>()->setTable(fuzzed_name);
|
||||||
/// Just in case add IF EXISTS to avoid exceptions.
|
/// Just in case add IF EXISTS to avoid exceptions.
|
||||||
@ -749,7 +759,9 @@ void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
|
|||||||
if (pos != std::string::npos)
|
if (pos != std::string::npos)
|
||||||
{
|
{
|
||||||
auto original_name = table_name.substr(0, pos);
|
auto original_name = table_name.substr(0, pos);
|
||||||
original_table_name_to_fuzzed[original_name].erase(table_name);
|
auto it = original_table_name_to_fuzzed.find(original_name);
|
||||||
|
if (it != original_table_name_to_fuzzed.end())
|
||||||
|
it->second.erase(table_name);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
|||||||
M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
|
M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
|
||||||
M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
|
M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
|
||||||
M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \
|
M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \
|
||||||
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 1000, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \
|
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \
|
||||||
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
|
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
|
||||||
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
|
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
|
||||||
M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
|
M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||||
#include <DataTypes/Serializations/SerializationAggregateFunction.h>
|
#include <DataTypes/Serializations/SerializationAggregateFunction.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
|
#include <DataTypes/transformTypesRecursively.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
#include <IO/Operators.h>
|
#include <IO/Operators.h>
|
||||||
|
|
||||||
@ -241,6 +242,23 @@ static DataTypePtr create(const ASTPtr & arguments)
|
|||||||
return std::make_shared<DataTypeAggregateFunction>(function, argument_types, params_row, version);
|
return std::make_shared<DataTypeAggregateFunction>(function, argument_types, params_row, version);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::optional<size_t> revision)
|
||||||
|
{
|
||||||
|
auto callback = [revision, if_empty](DataTypePtr & column_type)
|
||||||
|
{
|
||||||
|
const auto * aggregate_function_type = typeid_cast<const DataTypeAggregateFunction *>(column_type.get());
|
||||||
|
if (aggregate_function_type && aggregate_function_type->isVersioned())
|
||||||
|
{
|
||||||
|
if (revision)
|
||||||
|
aggregate_function_type->updateVersionFromRevision(*revision, if_empty);
|
||||||
|
else
|
||||||
|
aggregate_function_type->setVersion(0, if_empty);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
callOnNestedSimpleTypes(type, callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void registerDataTypeAggregateFunction(DataTypeFactory & factory)
|
void registerDataTypeAggregateFunction(DataTypeFactory & factory)
|
||||||
{
|
{
|
||||||
|
@ -70,8 +70,6 @@ public:
|
|||||||
|
|
||||||
bool isVersioned() const { return function->isVersioned(); }
|
bool isVersioned() const { return function->isVersioned(); }
|
||||||
|
|
||||||
size_t getVersionFromRevision(size_t revision) const { return function->getVersionFromRevision(revision); }
|
|
||||||
|
|
||||||
/// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according
|
/// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according
|
||||||
/// to server revision.
|
/// to server revision.
|
||||||
/// It is ok to have an empty version value here - then for serialization a default (latest)
|
/// It is ok to have an empty version value here - then for serialization a default (latest)
|
||||||
@ -84,6 +82,13 @@ public:
|
|||||||
|
|
||||||
version = version_;
|
version = version_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void updateVersionFromRevision(size_t revision, bool if_empty) const
|
||||||
|
{
|
||||||
|
setVersion(function->getVersionFromRevision(revision), if_empty);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::optional<size_t> revision = std::nullopt);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -175,4 +175,10 @@ void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &
|
|||||||
transform_simple_types(types);
|
transform_simple_types(types);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback)
|
||||||
|
{
|
||||||
|
DataTypes types = {type};
|
||||||
|
transformTypesRecursively(types, [callback](auto & data_types){ callback(data_types[0]); }, {});
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -14,4 +14,6 @@ namespace DB
|
|||||||
/// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
|
/// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
|
||||||
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &)> transform_simple_types, std::function<void(DataTypes &)> transform_complex_types);
|
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &)> transform_simple_types, std::function<void(DataTypes &)> transform_complex_types);
|
||||||
|
|
||||||
|
void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
|
#include <IO/parseDateTimeBestEffort.h>
|
||||||
#include <Parsers/TokenIterator.h>
|
#include <Parsers/TokenIterator.h>
|
||||||
|
|
||||||
|
|
||||||
@ -453,24 +454,52 @@ void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & secon
|
|||||||
second = std::move(types[1]);
|
second = std::move(types[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings)
|
bool tryInferDate(const std::string_view & field)
|
||||||
{
|
|
||||||
if (settings.try_infer_dates)
|
|
||||||
{
|
{
|
||||||
ReadBufferFromString buf(field);
|
ReadBufferFromString buf(field);
|
||||||
DayNum tmp;
|
DayNum tmp;
|
||||||
if (tryReadDateText(tmp, buf) && buf.eof())
|
return tryReadDateText(tmp, buf) && buf.eof();
|
||||||
return makeNullable(std::make_shared<DataTypeDate>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (settings.try_infer_datetimes)
|
bool tryInferDateTime(const std::string_view & field, const FormatSettings & settings)
|
||||||
{
|
{
|
||||||
ReadBufferFromString buf(field);
|
ReadBufferFromString buf(field);
|
||||||
|
Float64 tmp_float;
|
||||||
|
/// Check if it's just a number, and if so, don't try to infer DateTime from it,
|
||||||
|
/// because we can interpret this number as a timestamp and it will lead to
|
||||||
|
/// inferring DateTime instead of simple Int64/Float64 in some cases.
|
||||||
|
if (tryReadFloatText(tmp_float, buf) && buf.eof())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
buf.seek(0, SEEK_SET); /// Return position to the beginning
|
||||||
DateTime64 tmp;
|
DateTime64 tmp;
|
||||||
|
switch (settings.date_time_input_format)
|
||||||
|
{
|
||||||
|
case FormatSettings::DateTimeInputFormat::Basic:
|
||||||
if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof())
|
if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof())
|
||||||
return makeNullable(std::make_shared<DataTypeDateTime64>(9));
|
return true;
|
||||||
|
break;
|
||||||
|
case FormatSettings::DateTimeInputFormat::BestEffort:
|
||||||
|
if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof())
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
case FormatSettings::DateTimeInputFormat::BestEffortUS:
|
||||||
|
if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof())
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings)
|
||||||
|
{
|
||||||
|
if (settings.try_infer_dates && tryInferDate(field))
|
||||||
|
return makeNullable(std::make_shared<DataTypeDate>());
|
||||||
|
|
||||||
|
if (settings.try_infer_datetimes && tryInferDateTime(field, settings))
|
||||||
|
return makeNullable(std::make_shared<DataTypeDateTime64>(9));
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -145,12 +145,7 @@ Block NativeReader::read()
|
|||||||
readBinary(type_name, istr);
|
readBinary(type_name, istr);
|
||||||
column.type = data_type_factory.get(type_name);
|
column.type = data_type_factory.get(type_name);
|
||||||
|
|
||||||
const auto * aggregate_function_data_type = typeid_cast<const DataTypeAggregateFunction *>(column.type.get());
|
setVersionToAggregateFunctions(column.type, true, server_revision);
|
||||||
if (aggregate_function_data_type && aggregate_function_data_type->isVersioned())
|
|
||||||
{
|
|
||||||
auto version = aggregate_function_data_type->getVersionFromRevision(server_revision);
|
|
||||||
aggregate_function_data_type->setVersion(version, /*if_empty=*/ true);
|
|
||||||
}
|
|
||||||
|
|
||||||
SerializationPtr serialization;
|
SerializationPtr serialization;
|
||||||
if (server_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION)
|
if (server_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION)
|
||||||
|
@ -11,9 +11,8 @@
|
|||||||
#include <Formats/NativeWriter.h>
|
#include <Formats/NativeWriter.h>
|
||||||
|
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
|
||||||
#include <DataTypes/NestedUtils.h>
|
|
||||||
#include <Columns/ColumnSparse.h>
|
#include <Columns/ColumnSparse.h>
|
||||||
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -116,19 +115,7 @@ void NativeWriter::write(const Block & block)
|
|||||||
writeStringBinary(column.name, ostr);
|
writeStringBinary(column.name, ostr);
|
||||||
|
|
||||||
bool include_version = client_revision >= DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING;
|
bool include_version = client_revision >= DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING;
|
||||||
const auto * aggregate_function_data_type = typeid_cast<const DataTypeAggregateFunction *>(column.type.get());
|
setVersionToAggregateFunctions(column.type, include_version, include_version ? std::optional<size_t>(client_revision) : std::nullopt);
|
||||||
if (aggregate_function_data_type && aggregate_function_data_type->isVersioned())
|
|
||||||
{
|
|
||||||
if (include_version)
|
|
||||||
{
|
|
||||||
auto version = aggregate_function_data_type->getVersionFromRevision(client_revision);
|
|
||||||
aggregate_function_data_type->setVersion(version, /* if_empty */true);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
aggregate_function_data_type->setVersion(0, /* if_empty */false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Type
|
/// Type
|
||||||
String type_name = column.type->getName();
|
String type_name = column.type->getName();
|
||||||
|
@ -152,13 +152,6 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
|
|||||||
MutableColumnPtr result_holder = result_type->createColumn();
|
MutableColumnPtr result_holder = result_type->createColumn();
|
||||||
IColumn & res_col = *result_holder;
|
IColumn & res_col = *result_holder;
|
||||||
|
|
||||||
/// AggregateFunction's states should be inserted into column using specific way
|
|
||||||
auto * res_col_aggregate_function = typeid_cast<ColumnAggregateFunction *>(&res_col);
|
|
||||||
|
|
||||||
if (!res_col_aggregate_function && agg_func.isState())
|
|
||||||
throw Exception("State function " + agg_func.getName() + " inserts results into non-state column "
|
|
||||||
+ result_type->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
|
||||||
|
|
||||||
PODArray<AggregateDataPtr> places(input_rows_count);
|
PODArray<AggregateDataPtr> places(input_rows_count);
|
||||||
for (size_t i = 0; i < input_rows_count; ++i)
|
for (size_t i = 0; i < input_rows_count; ++i)
|
||||||
{
|
{
|
||||||
@ -190,10 +183,9 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < input_rows_count; ++i)
|
for (size_t i = 0; i < input_rows_count; ++i)
|
||||||
if (!res_col_aggregate_function)
|
/// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
|
||||||
agg_func.insertResultInto(places[i], res_col, arena.get());
|
/// correctly if result contains AggregateFunction's states
|
||||||
else
|
agg_func.insertMergeResultInto(places[i], res_col, arena.get());
|
||||||
res_col_aggregate_function->insertFrom(places[i]);
|
|
||||||
return result_holder;
|
return result_holder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,13 +202,6 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl(
|
|||||||
|
|
||||||
result_arr->getOffsets().insert(ranges_offsets->begin(), ranges_offsets->end());
|
result_arr->getOffsets().insert(ranges_offsets->begin(), ranges_offsets->end());
|
||||||
|
|
||||||
/// AggregateFunction's states should be inserted into column using specific way
|
|
||||||
auto * res_col_aggregate_function = typeid_cast<ColumnAggregateFunction *>(&result_data);
|
|
||||||
|
|
||||||
if (!res_col_aggregate_function && agg_func.isState())
|
|
||||||
throw Exception("State function " + agg_func.getName() + " inserts results into non-state column "
|
|
||||||
+ result_type->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
|
||||||
|
|
||||||
/// Perform the aggregation
|
/// Perform the aggregation
|
||||||
|
|
||||||
size_t begin = 0;
|
size_t begin = 0;
|
||||||
@ -379,11 +372,9 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl(
|
|||||||
for (size_t k = local_begin; k < local_end; ++k)
|
for (size_t k = local_begin; k < local_end; ++k)
|
||||||
true_func->add(place, aggregate_arguments, begin + k, arena.get());
|
true_func->add(place, aggregate_arguments, begin + k, arena.get());
|
||||||
}
|
}
|
||||||
|
/// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
|
||||||
if (!res_col_aggregate_function)
|
/// correctly if result contains AggregateFunction's states
|
||||||
agg_func.insertResultInto(place, result_data, arena.get());
|
agg_func.insertMergeResultInto(place, result_data, arena.get());
|
||||||
else
|
|
||||||
res_col_aggregate_function->insertFrom(place);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,7 +17,6 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
extern const int ILLEGAL_COLUMN;
|
|
||||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
extern const int BAD_ARGUMENTS;
|
extern const int BAD_ARGUMENTS;
|
||||||
}
|
}
|
||||||
@ -114,13 +113,6 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam
|
|||||||
MutableColumnPtr result_holder = result_type->createColumn();
|
MutableColumnPtr result_holder = result_type->createColumn();
|
||||||
IColumn & res_col = *result_holder;
|
IColumn & res_col = *result_holder;
|
||||||
|
|
||||||
/// AggregateFunction's states should be inserted into column using specific way
|
|
||||||
auto * res_col_aggregate_function = typeid_cast<ColumnAggregateFunction *>(&res_col);
|
|
||||||
|
|
||||||
if (!res_col_aggregate_function && agg_func.isState())
|
|
||||||
throw Exception("State function " + agg_func.getName() + " inserts results into non-state column "
|
|
||||||
+ result_type->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
|
||||||
|
|
||||||
PODArray<AggregateDataPtr> places(input_rows_count);
|
PODArray<AggregateDataPtr> places(input_rows_count);
|
||||||
for (size_t i = 0; i < input_rows_count; ++i)
|
for (size_t i = 0; i < input_rows_count; ++i)
|
||||||
{
|
{
|
||||||
@ -151,10 +143,9 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < input_rows_count; ++i)
|
for (size_t i = 0; i < input_rows_count; ++i)
|
||||||
if (!res_col_aggregate_function)
|
/// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
|
||||||
agg_func.insertResultInto(places[i], res_col, arena.get());
|
/// correctly if result contains AggregateFunction's states
|
||||||
else
|
agg_func.insertMergeResultInto(places[i], res_col, arena.get());
|
||||||
res_col_aggregate_function->insertFrom(places[i]);
|
|
||||||
return result_holder;
|
return result_holder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,7 +117,7 @@ private:
|
|||||||
else if (which.isDate())
|
else if (which.isDate())
|
||||||
f(DataTypeDate::FieldType());
|
f(DataTypeDate::FieldType());
|
||||||
else if (which.isDate32())
|
else if (which.isDate32())
|
||||||
f(DataTypeDate::FieldType());
|
f(DataTypeDate32::FieldType());
|
||||||
else if (which.isDateTime())
|
else if (which.isDateTime())
|
||||||
f(DataTypeDateTime::FieldType());
|
f(DataTypeDateTime::FieldType());
|
||||||
else
|
else
|
||||||
|
@ -22,11 +22,6 @@ namespace ErrorCodes
|
|||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
/// The regex-based code style check script in CI complains when it sees "ErrorCodes:: ErrorCode" (space added to avoid another match).
|
|
||||||
/// Because this expression is only used in this file, don't add some suppression mechanism to the already complex style checker, instead
|
|
||||||
/// work around by creating a namespace alias.
|
|
||||||
namespace ErrorCodeAlias = ErrorCodes;
|
|
||||||
|
|
||||||
/// Throw an exception if the argument is non zero.
|
/// Throw an exception if the argument is non zero.
|
||||||
class FunctionThrowIf : public IFunction
|
class FunctionThrowIf : public IFunction
|
||||||
{
|
{
|
||||||
@ -93,7 +88,7 @@ public:
|
|||||||
custom_message = message_column->getValue<String>();
|
custom_message = message_column->getValue<String>();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<ErrorCodeAlias::ErrorCode> custom_error_code;
|
std::optional<ErrorCodes::ErrorCode> custom_error_code;
|
||||||
if (allow_custom_error_code_argument && arguments.size() == 3)
|
if (allow_custom_error_code_argument && arguments.size() == 3)
|
||||||
{
|
{
|
||||||
if (!isColumnConst(*(arguments[2].column)))
|
if (!isColumnConst(*(arguments[2].column)))
|
||||||
@ -125,7 +120,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnPtr execute(const IColumn * in_untyped, const std::optional<String> & message, const std::optional<ErrorCodeAlias::ErrorCode> & error_code) const
|
ColumnPtr execute(const IColumn * in_untyped, const std::optional<String> & message, const std::optional<ErrorCodes::ErrorCode> & error_code) const
|
||||||
{
|
{
|
||||||
const auto * in = checkAndGetColumn<ColumnVector<T>>(in_untyped);
|
const auto * in = checkAndGetColumn<ColumnVector<T>>(in_untyped);
|
||||||
|
|
||||||
|
@ -811,6 +811,11 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
|
|||||||
return AggregatedDataVariants::Type::low_cardinality_key32;
|
return AggregatedDataVariants::Type::low_cardinality_key32;
|
||||||
if (size_of_field == 8)
|
if (size_of_field == 8)
|
||||||
return AggregatedDataVariants::Type::low_cardinality_key64;
|
return AggregatedDataVariants::Type::low_cardinality_key64;
|
||||||
|
if (size_of_field == 16)
|
||||||
|
return AggregatedDataVariants::Type::low_cardinality_keys128;
|
||||||
|
if (size_of_field == 32)
|
||||||
|
return AggregatedDataVariants::Type::low_cardinality_keys256;
|
||||||
|
throw Exception("Logical error: low cardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size_of_field == 1)
|
if (size_of_field == 1)
|
||||||
|
@ -232,6 +232,11 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
|
|||||||
data->type = Type::CROSS;
|
data->type = Type::CROSS;
|
||||||
sample_block_with_columns_to_add = right_sample_block;
|
sample_block_with_columns_to_add = right_sample_block;
|
||||||
}
|
}
|
||||||
|
else if (table_join->getClauses().empty())
|
||||||
|
{
|
||||||
|
data->type = Type::EMPTY;
|
||||||
|
sample_block_with_columns_to_add = right_sample_block;
|
||||||
|
}
|
||||||
else if (table_join->oneDisjunct())
|
else if (table_join->oneDisjunct())
|
||||||
{
|
{
|
||||||
const auto & key_names_right = table_join->getOnlyClause().key_names_right;
|
const auto & key_names_right = table_join->getOnlyClause().key_names_right;
|
||||||
|
@ -12,17 +12,14 @@
|
|||||||
#include <Common/hex.h>
|
#include <Common/hex.h>
|
||||||
|
|
||||||
#include <Core/Defines.h>
|
#include <Core/Defines.h>
|
||||||
#include <Core/Settings.h>
|
|
||||||
#include <Core/SettingsEnums.h>
|
#include <Core/SettingsEnums.h>
|
||||||
|
|
||||||
#include <IO/WriteBufferFromFile.h>
|
#include <IO/WriteBufferFromFile.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/ReadHelpers.h>
|
|
||||||
|
|
||||||
#include <Parsers/ASTColumnDeclaration.h>
|
#include <Parsers/ASTColumnDeclaration.h>
|
||||||
#include <Parsers/ASTCreateQuery.h>
|
#include <Parsers/ASTCreateQuery.h>
|
||||||
#include <Parsers/ASTIdentifier.h>
|
#include <Parsers/ASTIdentifier.h>
|
||||||
#include <Parsers/ASTIndexDeclaration.h>
|
|
||||||
#include <Parsers/ASTLiteral.h>
|
#include <Parsers/ASTLiteral.h>
|
||||||
#include <Parsers/ASTInsertQuery.h>
|
#include <Parsers/ASTInsertQuery.h>
|
||||||
#include <Parsers/ParserCreateQuery.h>
|
#include <Parsers/ParserCreateQuery.h>
|
||||||
@ -37,7 +34,6 @@
|
|||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||||
#include <Interpreters/executeQuery.h>
|
#include <Interpreters/executeQuery.h>
|
||||||
#include <Interpreters/Cluster.h>
|
|
||||||
#include <Interpreters/DDLTask.h>
|
#include <Interpreters/DDLTask.h>
|
||||||
#include <Interpreters/ExpressionAnalyzer.h>
|
#include <Interpreters/ExpressionAnalyzer.h>
|
||||||
#include <Interpreters/InterpreterCreateQuery.h>
|
#include <Interpreters/InterpreterCreateQuery.h>
|
||||||
@ -59,7 +55,6 @@
|
|||||||
|
|
||||||
#include <Databases/DatabaseFactory.h>
|
#include <Databases/DatabaseFactory.h>
|
||||||
#include <Databases/DatabaseReplicated.h>
|
#include <Databases/DatabaseReplicated.h>
|
||||||
#include <Databases/IDatabase.h>
|
|
||||||
#include <Databases/DatabaseOnDisk.h>
|
#include <Databases/DatabaseOnDisk.h>
|
||||||
#include <Databases/TablesLoader.h>
|
#include <Databases/TablesLoader.h>
|
||||||
#include <Databases/DDLDependencyVisitor.h>
|
#include <Databases/DDLDependencyVisitor.h>
|
||||||
@ -484,9 +479,8 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
|||||||
{
|
{
|
||||||
column_type = DataTypeFactory::instance().get(col_decl.type);
|
column_type = DataTypeFactory::instance().get(col_decl.type);
|
||||||
|
|
||||||
const auto * aggregate_function_type = typeid_cast<const DataTypeAggregateFunction *>(column_type.get());
|
if (attach)
|
||||||
if (attach && aggregate_function_type && aggregate_function_type->isVersioned())
|
setVersionToAggregateFunctions(column_type, true);
|
||||||
aggregate_function_type->setVersion(0, /* if_empty */true);
|
|
||||||
|
|
||||||
if (col_decl.null_modifier)
|
if (col_decl.null_modifier)
|
||||||
{
|
{
|
||||||
|
@ -378,7 +378,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
|
|||||||
}
|
}
|
||||||
else if (type == ASTAlterCommand::FREEZE_ALL)
|
else if (type == ASTAlterCommand::FREEZE_ALL)
|
||||||
{
|
{
|
||||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "FREEZE";
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << "FREEZE" << (settings.hilite ? hilite_none : "");
|
||||||
|
|
||||||
if (!with_name.empty())
|
if (!with_name.empty())
|
||||||
{
|
{
|
||||||
@ -399,7 +399,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState &
|
|||||||
}
|
}
|
||||||
else if (type == ASTAlterCommand::UNFREEZE_ALL)
|
else if (type == ASTAlterCommand::UNFREEZE_ALL)
|
||||||
{
|
{
|
||||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "UNFREEZE";
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << "UNFREEZE" << (settings.hilite ? hilite_none : "");
|
||||||
|
|
||||||
if (!with_name.empty())
|
if (!with_name.empty())
|
||||||
{
|
{
|
||||||
|
@ -132,6 +132,16 @@ NamesAndTypesList IRowSchemaReader::readSchema()
|
|||||||
ErrorCodes::INCORRECT_DATA,
|
ErrorCodes::INCORRECT_DATA,
|
||||||
"The number of column names {} differs with the number of types {}", column_names.size(), data_types.size());
|
"The number of column names {} differs with the number of types {}", column_names.size(), data_types.size());
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::unordered_set<std::string_view> names_set;
|
||||||
|
for (const auto & name : column_names)
|
||||||
|
{
|
||||||
|
if (names_set.contains(name))
|
||||||
|
throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name);
|
||||||
|
names_set.insert(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i != column_names.size(); ++i)
|
for (size_t i = 0; i != column_names.size(); ++i)
|
||||||
{
|
{
|
||||||
@ -224,6 +234,9 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
|
|||||||
names_order.reserve(names_and_types.size());
|
names_order.reserve(names_and_types.size());
|
||||||
for (const auto & [name, type] : names_and_types)
|
for (const auto & [name, type] : names_and_types)
|
||||||
{
|
{
|
||||||
|
if (names_to_types.contains(name))
|
||||||
|
throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name);
|
||||||
|
|
||||||
auto hint_it = hints.find(name);
|
auto hint_it = hints.find(name);
|
||||||
if (hint_it != hints.end())
|
if (hint_it != hints.end())
|
||||||
names_to_types[name] = hint_it->second;
|
names_to_types[name] = hint_it->second;
|
||||||
@ -240,8 +253,13 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
|
|||||||
/// We reached eof.
|
/// We reached eof.
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
std::unordered_set<std::string_view> names_set; /// We should check for duplicate column names in current row
|
||||||
for (auto & [name, new_type] : new_names_and_types)
|
for (auto & [name, new_type] : new_names_and_types)
|
||||||
{
|
{
|
||||||
|
if (names_set.contains(name))
|
||||||
|
throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name);
|
||||||
|
names_set.insert(name);
|
||||||
|
|
||||||
auto it = names_to_types.find(name);
|
auto it = names_to_types.find(name);
|
||||||
/// If we didn't see this column before, just add it.
|
/// If we didn't see this column before, just add it.
|
||||||
if (it == names_to_types.end())
|
if (it == names_to_types.end())
|
||||||
|
@ -4,11 +4,6 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int LOGICAL_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
Block ArrayJoinTransform::transformHeader(Block header, const ArrayJoinActionPtr & array_join)
|
Block ArrayJoinTransform::transformHeader(Block header, const ArrayJoinActionPtr & array_join)
|
||||||
{
|
{
|
||||||
array_join->execute(header);
|
array_join->execute(header);
|
||||||
|
@ -28,7 +28,6 @@ namespace ErrorCodes
|
|||||||
{
|
{
|
||||||
extern const int BAD_ARGUMENTS;
|
extern const int BAD_ARGUMENTS;
|
||||||
extern const int NOT_IMPLEMENTED;
|
extern const int NOT_IMPLEMENTED;
|
||||||
extern const int ILLEGAL_COLUMN;
|
|
||||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -984,22 +983,9 @@ void WindowTransform::writeOutCurrentRow()
|
|||||||
// FIXME does it also allocate the result on the arena?
|
// FIXME does it also allocate the result on the arena?
|
||||||
// We'll have to pass it out with blocks then...
|
// We'll have to pass it out with blocks then...
|
||||||
|
|
||||||
if (a->isState())
|
/// We should use insertMergeResultInto to insert result into ColumnAggregateFunction
|
||||||
{
|
/// correctly if result contains AggregateFunction's states
|
||||||
/// AggregateFunction's states should be inserted into column using specific way
|
a->insertMergeResultInto(buf, *result_column, arena.get());
|
||||||
auto * res_col_aggregate_function = typeid_cast<ColumnAggregateFunction *>(result_column);
|
|
||||||
if (!res_col_aggregate_function)
|
|
||||||
{
|
|
||||||
throw Exception("State function " + a->getName() + " inserts results into non-state column ",
|
|
||||||
ErrorCodes::ILLEGAL_COLUMN);
|
|
||||||
}
|
|
||||||
res_col_aggregate_function->insertFrom(buf);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
a->insertResultInto(buf, *result_column, arena.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,8 +120,15 @@ namespace
|
|||||||
|
|
||||||
std::pair<String, String> getPathFromUriAndUriWithoutPath(const String & uri)
|
std::pair<String, String> getPathFromUriAndUriWithoutPath(const String & uri)
|
||||||
{
|
{
|
||||||
const size_t begin_of_path = uri.find('/', uri.find("//") + 2);
|
auto pos = uri.find("//");
|
||||||
return {uri.substr(begin_of_path), uri.substr(0, begin_of_path)};
|
if (pos != std::string::npos && pos + 2 < uri.length())
|
||||||
|
{
|
||||||
|
pos = uri.find('/', pos + 2);
|
||||||
|
if (pos != std::string::npos)
|
||||||
|
return {uri.substr(pos), uri.substr(0, pos)};
|
||||||
|
}
|
||||||
|
|
||||||
|
throw Exception("Storage HDFS requires valid URL to be set", ErrorCodes::BAD_ARGUMENTS);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<String> getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context, std::unordered_map<String, time_t> * last_mod_times = nullptr)
|
std::vector<String> getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context, std::unordered_map<String, time_t> * last_mod_times = nullptr)
|
||||||
|
@ -1190,12 +1190,8 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
|||||||
auto in = metadata_manager->read("columns.txt");
|
auto in = metadata_manager->read("columns.txt");
|
||||||
loaded_columns.readText(*in);
|
loaded_columns.readText(*in);
|
||||||
|
|
||||||
for (const auto & column : loaded_columns)
|
for (auto & column : loaded_columns)
|
||||||
{
|
setVersionToAggregateFunctions(column.type, true);
|
||||||
const auto * aggregate_function_data_type = typeid_cast<const DataTypeAggregateFunction *>(column.type.get());
|
|
||||||
if (aggregate_function_data_type && aggregate_function_data_type->isVersioned())
|
|
||||||
aggregate_function_data_type->setVersion(0, /* if_empty */true);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SerializationInfo::Settings settings =
|
SerializationInfo::Settings settings =
|
||||||
|
@ -66,7 +66,7 @@ private:
|
|||||||
StorageMetadataPtr metadata_snapshot;
|
StorageMetadataPtr metadata_snapshot;
|
||||||
bool deduplicate;
|
bool deduplicate;
|
||||||
Names deduplicate_by_columns;
|
Names deduplicate_by_columns;
|
||||||
std::shared_ptr<MergeMutateSelectedEntry> merge_mutate_entry{nullptr};
|
MergeMutateSelectedEntryPtr merge_mutate_entry{nullptr};
|
||||||
TableLockHolder table_lock_holder;
|
TableLockHolder table_lock_holder;
|
||||||
FutureMergedMutatedPartPtr future_part{nullptr};
|
FutureMergedMutatedPartPtr future_part{nullptr};
|
||||||
MergeTreeData::MutableDataPartPtr new_part;
|
MergeTreeData::MutableDataPartPtr new_part;
|
||||||
|
@ -798,7 +798,7 @@ void StorageMergeTree::loadMutations()
|
|||||||
increment.value = std::max(increment.value.load(), current_mutations_by_version.rbegin()->first);
|
increment.value = std::max(increment.value.load(), current_mutations_by_version.rbegin()->first);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<MergeMutateSelectedEntry> StorageMergeTree::selectPartsToMerge(
|
MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
|
||||||
const StorageMetadataPtr & metadata_snapshot,
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
bool aggressive,
|
bool aggressive,
|
||||||
const String & partition_id,
|
const String & partition_id,
|
||||||
@ -943,7 +943,7 @@ bool StorageMergeTree::merge(
|
|||||||
|
|
||||||
SelectPartsDecision select_decision;
|
SelectPartsDecision select_decision;
|
||||||
|
|
||||||
std::shared_ptr<MergeMutateSelectedEntry> merge_mutate_entry;
|
MergeMutateSelectedEntryPtr merge_mutate_entry;
|
||||||
|
|
||||||
{
|
{
|
||||||
std::unique_lock lock(currently_processing_in_background_mutex);
|
std::unique_lock lock(currently_processing_in_background_mutex);
|
||||||
@ -989,7 +989,7 @@ bool StorageMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & p
|
|||||||
return currently_merging_mutating_parts.contains(part);
|
return currently_merging_mutating_parts.contains(part);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<MergeMutateSelectedEntry> StorageMergeTree::selectPartsToMutate(
|
MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
|
||||||
const StorageMetadataPtr & metadata_snapshot, String * /* disable_reason */, TableLockHolder & /* table_lock_holder */,
|
const StorageMetadataPtr & metadata_snapshot, String * /* disable_reason */, TableLockHolder & /* table_lock_holder */,
|
||||||
std::unique_lock<std::mutex> & /*currently_processing_in_background_mutex_lock*/)
|
std::unique_lock<std::mutex> & /*currently_processing_in_background_mutex_lock*/)
|
||||||
{
|
{
|
||||||
@ -1132,7 +1132,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign
|
|||||||
assert(!isStaticStorage());
|
assert(!isStaticStorage());
|
||||||
|
|
||||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||||
std::shared_ptr<MergeMutateSelectedEntry> merge_entry, mutate_entry;
|
MergeMutateSelectedEntryPtr merge_entry, mutate_entry;
|
||||||
|
|
||||||
auto share_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations);
|
auto share_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations);
|
||||||
|
|
||||||
|
@ -187,7 +187,7 @@ private:
|
|||||||
|
|
||||||
friend struct CurrentlyMergingPartsTagger;
|
friend struct CurrentlyMergingPartsTagger;
|
||||||
|
|
||||||
std::shared_ptr<MergeMutateSelectedEntry> selectPartsToMerge(
|
MergeMutateSelectedEntryPtr selectPartsToMerge(
|
||||||
const StorageMetadataPtr & metadata_snapshot,
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
bool aggressive,
|
bool aggressive,
|
||||||
const String & partition_id,
|
const String & partition_id,
|
||||||
@ -200,7 +200,7 @@ private:
|
|||||||
SelectPartsDecision * select_decision_out = nullptr);
|
SelectPartsDecision * select_decision_out = nullptr);
|
||||||
|
|
||||||
|
|
||||||
std::shared_ptr<MergeMutateSelectedEntry> selectPartsToMutate(
|
MergeMutateSelectedEntryPtr selectPartsToMutate(
|
||||||
const StorageMetadataPtr & metadata_snapshot, String * disable_reason,
|
const StorageMetadataPtr & metadata_snapshot, String * disable_reason,
|
||||||
TableLockHolder & table_lock_holder, std::unique_lock<std::mutex> & currently_processing_in_background_mutex_lock);
|
TableLockHolder & table_lock_holder, std::unique_lock<std::mutex> & currently_processing_in_background_mutex_lock);
|
||||||
|
|
||||||
|
@ -7673,14 +7673,14 @@ namespace
|
|||||||
/// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part.
|
/// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part.
|
||||||
/// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas
|
/// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas
|
||||||
/// still need parent part.
|
/// still need parent part.
|
||||||
NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version, Poco::Logger * log)
|
std::pair<bool, NameSet> getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version, Poco::Logger * log)
|
||||||
{
|
{
|
||||||
NameSet files_not_to_remove;
|
NameSet files_not_to_remove;
|
||||||
|
|
||||||
MergeTreePartInfo part_info = MergeTreePartInfo::fromPartName(part_info_str, format_version);
|
MergeTreePartInfo part_info = MergeTreePartInfo::fromPartName(part_info_str, format_version);
|
||||||
/// No mutations -- no hardlinks -- no issues
|
/// No mutations -- no hardlinks -- no issues
|
||||||
if (part_info.mutation == 0)
|
if (part_info.mutation == 0)
|
||||||
return files_not_to_remove;
|
return {false, files_not_to_remove};
|
||||||
|
|
||||||
/// Getting all zero copy parts
|
/// Getting all zero copy parts
|
||||||
Strings parts_str;
|
Strings parts_str;
|
||||||
@ -7725,10 +7725,10 @@ NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::stri
|
|||||||
LOG_TRACE(log, "Found files not to remove from parent part {}: [{}]", part_candidate_info_str, fmt::join(files_not_to_remove, ", "));
|
LOG_TRACE(log, "Found files not to remove from parent part {}: [{}]", part_candidate_info_str, fmt::join(files_not_to_remove, ", "));
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
return {true, files_not_to_remove};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return files_not_to_remove;
|
return {false, files_not_to_remove};
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -7754,7 +7754,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
|
|||||||
if (!files_not_to_remove_str.empty())
|
if (!files_not_to_remove_str.empty())
|
||||||
boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n "));
|
boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n "));
|
||||||
|
|
||||||
auto parent_not_to_remove = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version, logger);
|
auto [has_parent, parent_not_to_remove] = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version, logger);
|
||||||
files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end());
|
files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end());
|
||||||
|
|
||||||
String zookeeper_part_uniq_node = fs::path(zc_zookeeper_path) / part_id;
|
String zookeeper_part_uniq_node = fs::path(zc_zookeeper_path) / part_id;
|
||||||
@ -7764,10 +7764,24 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
|
|||||||
|
|
||||||
LOG_TRACE(logger, "Remove zookeeper lock {} for part {}", zookeeper_part_replica_node, part_name);
|
LOG_TRACE(logger, "Remove zookeeper lock {} for part {}", zookeeper_part_replica_node, part_name);
|
||||||
|
|
||||||
if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK && ec != Coordination::Error::ZNONODE)
|
if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK)
|
||||||
|
{
|
||||||
|
/// Very complex case. It means that lock already doesn't exist when we tried to remove it.
|
||||||
|
/// So we don't know are we owner of this part or not. Maybe we just mutated it, renamed on disk and failed to lock in ZK.
|
||||||
|
/// But during mutation we can have hardlinks to another part. So it's not Ok to remove blobs of this part if it was mutated.
|
||||||
|
if (ec == Coordination::Error::ZNONODE)
|
||||||
|
{
|
||||||
|
if (has_parent)
|
||||||
|
{
|
||||||
|
LOG_INFO(logger, "Lock on path {} for part {} doesn't exist, refuse to remove blobs", zookeeper_part_replica_node, part_name);
|
||||||
|
return {false, {}};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
throw zkutil::KeeperException(ec, zookeeper_part_replica_node);
|
throw zkutil::KeeperException(ec, zookeeper_part_replica_node);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Check, maybe we were the last replica and can remove part forever
|
/// Check, maybe we were the last replica and can remove part forever
|
||||||
Strings children;
|
Strings children;
|
||||||
|
@ -19,3 +19,27 @@
|
|||||||
\N
|
\N
|
||||||
\N
|
\N
|
||||||
2
|
2
|
||||||
|
--Date Difference--
|
||||||
|
\N
|
||||||
|
\N
|
||||||
|
0
|
||||||
|
364
|
||||||
|
\N
|
||||||
|
\N
|
||||||
|
14466
|
||||||
|
-
|
||||||
|
\N
|
||||||
|
\N
|
||||||
|
0
|
||||||
|
11101
|
||||||
|
22017
|
||||||
|
\N
|
||||||
|
105432
|
||||||
|
-
|
||||||
|
\N
|
||||||
|
\N
|
||||||
|
0
|
||||||
|
3149094509
|
||||||
|
\N
|
||||||
|
\N
|
||||||
|
1130059331
|
||||||
|
@ -5,4 +5,9 @@ select '-';
|
|||||||
select runningDifference(x) from (select arrayJoin([Null, 1]) as x);
|
select runningDifference(x) from (select arrayJoin([Null, 1]) as x);
|
||||||
select '-';
|
select '-';
|
||||||
select runningDifference(x) from (select arrayJoin([Null, Null, 1, 3, Null, Null, 5]) as x);
|
select runningDifference(x) from (select arrayJoin([Null, Null, 1, 3, Null, Null, 5]) as x);
|
||||||
|
select '--Date Difference--';
|
||||||
|
select runningDifference(x) from (select arrayJoin([Null, Null, toDate('1970-1-1'), toDate('1970-12-31'), Null, Null, toDate('2010-8-9')]) as x);
|
||||||
|
select '-';
|
||||||
|
select runningDifference(x) from (select arrayJoin([Null, Null, toDate32('1900-1-1'), toDate32('1930-5-25'), toDate('1990-9-4'), Null, toDate32('2279-5-4')]) as x);
|
||||||
|
select '-';
|
||||||
|
select runningDifference(x) from (select arrayJoin([Null, Null, toDateTime('1970-06-28 23:48:12', 'Asia/Istanbul'), toDateTime('2070-04-12 21:16:41', 'Asia/Istanbul'), Null, Null, toDateTime('2106-02-03 06:38:52', 'Asia/Istanbul')]) as x);
|
||||||
|
86
tests/queries/0_stateless/02294_anova_cmp.python
Normal file
86
tests/queries/0_stateless/02294_anova_cmp.python
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from statistics import variance
|
||||||
|
from scipy import stats
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||||
|
|
||||||
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
|
|
||||||
|
# unpooled variance z-test for means of two samples
|
||||||
|
def scipy_anova(rvs):
|
||||||
|
return stats.f_oneway(*rvs)
|
||||||
|
|
||||||
|
|
||||||
|
def test_and_check(rvs, n_groups, f_stat, p_value, precision=1e-2):
|
||||||
|
client = ClickHouseClient()
|
||||||
|
client.query("DROP TABLE IF EXISTS anova;")
|
||||||
|
client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;")
|
||||||
|
for group in range(n_groups):
|
||||||
|
client.query(f'''INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};''')
|
||||||
|
|
||||||
|
real = client.query_return_df(
|
||||||
|
'''SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;''')
|
||||||
|
|
||||||
|
real_f_stat = real['f_stat'][0]
|
||||||
|
real_p_value = real['p_value'][0]
|
||||||
|
assert(abs(real_f_stat - np.float64(f_stat)) < precision), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}"
|
||||||
|
assert(abs(real_p_value - np.float64(p_value)) < precision), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}"
|
||||||
|
client.query("DROP TABLE IF EXISTS anova;")
|
||||||
|
|
||||||
|
|
||||||
|
def test_anova():
|
||||||
|
n_groups = 3
|
||||||
|
rvs = []
|
||||||
|
loc = 0
|
||||||
|
scale = 5
|
||||||
|
size = 500
|
||||||
|
for _ in range(n_groups):
|
||||||
|
rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2))
|
||||||
|
loc += 5
|
||||||
|
f_stat, p_value = scipy_anova(rvs)
|
||||||
|
test_and_check(rvs, n_groups, f_stat, p_value)
|
||||||
|
|
||||||
|
n_groups = 6
|
||||||
|
rvs = []
|
||||||
|
loc = 0
|
||||||
|
scale = 5
|
||||||
|
size = 500
|
||||||
|
for _ in range(n_groups):
|
||||||
|
rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2))
|
||||||
|
f_stat, p_value = scipy_anova(rvs)
|
||||||
|
test_and_check(rvs, n_groups, f_stat, p_value)
|
||||||
|
|
||||||
|
n_groups = 10
|
||||||
|
rvs = []
|
||||||
|
loc = 1
|
||||||
|
scale = 2
|
||||||
|
size = 100
|
||||||
|
for _ in range(n_groups):
|
||||||
|
rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2))
|
||||||
|
loc += 1
|
||||||
|
scale += 2
|
||||||
|
size += 100
|
||||||
|
f_stat, p_value = scipy_anova(rvs)
|
||||||
|
test_and_check(rvs, n_groups, f_stat, p_value)
|
||||||
|
|
||||||
|
n_groups = 20
|
||||||
|
rvs = []
|
||||||
|
loc = 0
|
||||||
|
scale = 10
|
||||||
|
size = 1100
|
||||||
|
for _ in range(n_groups):
|
||||||
|
rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2))
|
||||||
|
size -= 50
|
||||||
|
f_stat, p_value = scipy_anova(rvs)
|
||||||
|
test_and_check(rvs, n_groups, f_stat, p_value)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_anova()
|
||||||
|
print("Ok.")
|
1
tests/queries/0_stateless/02294_anova_cmp.reference
Normal file
1
tests/queries/0_stateless/02294_anova_cmp.reference
Normal file
@ -0,0 +1 @@
|
|||||||
|
Ok.
|
9
tests/queries/0_stateless/02294_anova_cmp.sh
Executable file
9
tests/queries/0_stateless/02294_anova_cmp.sh
Executable file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
# We should have correct env vars from shell_config.sh to run this test
|
||||||
|
|
||||||
|
python3 "$CURDIR"/02294_anova_cmp.python
|
@ -0,0 +1,50 @@
|
|||||||
|
{1:'\0ёwRп'}
|
||||||
|
{1:'\0Dѕ@='}
|
||||||
|
{1:'\07Ќ<37><D08C>'}
|
||||||
|
{1:'\0СтоВ'}
|
||||||
|
{1:'\0"Q<06>'}
|
||||||
|
{1:'\0V\'<27>у'}
|
||||||
|
{1:'\0вт\0Ј'}
|
||||||
|
{1:'\0Ѓ_Ч'}
|
||||||
|
{1:'\0qЕ4h'}
|
||||||
|
{1:'\0gд7'}
|
||||||
|
['\0ёwRп']
|
||||||
|
['\0Dѕ@=']
|
||||||
|
['\07Ќ<37><D08C>']
|
||||||
|
['\0СтоВ']
|
||||||
|
['\0"Q<06>']
|
||||||
|
['\0V\'<27>у']
|
||||||
|
['\0вт\0Ј']
|
||||||
|
['\0Ѓ_Ч']
|
||||||
|
['\0qЕ4h']
|
||||||
|
['\0gд7']
|
||||||
|
['\0щZТ','\0\0']
|
||||||
|
['\0т4nџ','\0\0']
|
||||||
|
['\0<01>о<EFBFBD>','\0\0']
|
||||||
|
['\0Й<>№1','\0\0']
|
||||||
|
['\0<01>_<V','\0\0']
|
||||||
|
['\0ЪJА','\0\0']
|
||||||
|
['\0DЖ\03','\0\0']
|
||||||
|
['\0й6%','\0\0']
|
||||||
|
['\0@<40>','\0\0']
|
||||||
|
['\0gЙљ','\0\0']
|
||||||
|
[{1:['\0ёwRп']}]
|
||||||
|
[{1:['\0Dѕ@=']}]
|
||||||
|
[{1:['\07Ќ<37><D08C>']}]
|
||||||
|
[{1:['\0СтоВ']}]
|
||||||
|
[{1:['\0"Q<06>']}]
|
||||||
|
[{1:['\0V\'<27>у']}]
|
||||||
|
[{1:['\0вт\0Ј']}]
|
||||||
|
[{1:['\0Ѓ_Ч']}]
|
||||||
|
[{1:['\0qЕ4h']}]
|
||||||
|
[{1:['\0gд7']}]
|
||||||
|
{1:'\0ёwRп'}
|
||||||
|
{1:'\0Dѕ@='}
|
||||||
|
{1:'\07Ќ<37><D08C>'}
|
||||||
|
{1:'\0СтоВ'}
|
||||||
|
{1:'\0"Q<06>'}
|
||||||
|
{1:'\0V\'<27>у'}
|
||||||
|
{1:'\0вт\0Ј'}
|
||||||
|
{1:'\0Ѓ_Ч'}
|
||||||
|
{1:'\0qЕ4h'}
|
||||||
|
{1:'\0gд7'}
|
@ -0,0 +1,10 @@
|
|||||||
|
drop table if exists test;
|
||||||
|
create table test (x AggregateFunction(uniq, UInt64), y Int64) engine=Memory;
|
||||||
|
insert into test select uniqState(number) as x, number as y from numbers(10) group by number;
|
||||||
|
select uniqStateMap(map(1, x)) OVER (PARTITION BY y) from test;
|
||||||
|
select uniqStateForEach([x]) OVER (PARTITION BY y) from test;
|
||||||
|
select uniqStateResample(30, 75, 30)([x], 30) OVER (PARTITION BY y) from test;
|
||||||
|
select uniqStateForEachMapForEach([map(1, [x])]) OVER (PARTITION BY y) from test;
|
||||||
|
select uniqStateDistinctMap(map(1, x)) OVER (PARTITION BY y) from test;
|
||||||
|
drop table test;
|
||||||
|
|
@ -0,0 +1,6 @@
|
|||||||
|
{1:'\0çƒe'}
|
||||||
|
['\0,ËÂ4çƒe']
|
||||||
|
[{1:['\0çƒe']}]
|
||||||
|
[{1:'\0‰\f¤”µýŸ¿¼'},{1:'\0‰\f¤”µº#¾q'},{1:'\0‰\f*<•º#¾q'}]
|
||||||
|
[['\0‰\f¤”µýŸ¿¼'],['\0‰\f¤”µº#¾q'],['\0‰\f*<•º#¾q']]
|
||||||
|
[[{1:['\0‰\f¤”µýŸ¿¼']}],[{1:['\0‰\f¤”µº#¾q']}],[{1:['\0‰\f*<•º#¾q']}]]
|
@ -0,0 +1,6 @@
|
|||||||
|
select arrayReduce('uniqStateMap', [map(1, 2)]);
|
||||||
|
select arrayReduce('uniqStateForEach', [[1], [2]]);
|
||||||
|
select arrayReduce('uniqStateForEachMapForEach', [[map(1, [2])]]);
|
||||||
|
select arrayReduceInRanges('uniqStateMap', [(1, 3), (2, 3), (3, 3)], [map(1, 'a'), map(1, 'b'), map(1, 'c'), map(1, 'd'), map(1, 'e')]);
|
||||||
|
select arrayReduceInRanges('uniqStateForEach', [(1, 3), (2, 3), (3, 3)], [['a'], ['b'], ['c'],['d'], ['e']]);
|
||||||
|
select arrayReduceInRanges('uniqStateForEachMapForEach', [(1, 3), (2, 3), (3, 3)], [[map(1, ['a'])], [map(1, ['b'])], [map(1, ['c'])], [map(1, ['d'])], [map(1, ['e'])]]);
|
@ -0,0 +1,3 @@
|
|||||||
|
{1:'\0çƒe'}
|
||||||
|
['\0,ËÂ4','\0çƒe']
|
||||||
|
[{1:['\0çƒe']}]
|
@ -0,0 +1,4 @@
|
|||||||
|
select initializeAggregation('uniqStateMap', map(1, 2));
|
||||||
|
select initializeAggregation('uniqStateForEach', [1, 2]);
|
||||||
|
select initializeAggregation('uniqStateForEachMapForEach', [map(1, [2])]);
|
||||||
|
|
@ -0,0 +1,7 @@
|
|||||||
|
-- Tags: no-fasttest
|
||||||
|
|
||||||
|
desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError INCORRECT_DATA}
|
||||||
|
desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError INCORRECT_DATA}
|
||||||
|
desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError INCORRECT_DATA}
|
||||||
|
desc format(CSV, '1,2,3') settings column_names_for_schema_inference='a, b, a'; -- {serverError INCORRECT_DATA}
|
||||||
|
|
@ -0,0 +1,11 @@
|
|||||||
|
222222222222222
|
||||||
|
22222222222.2222
|
||||||
|
2022-04-22 03:45:06.381000000
|
||||||
|
2022-04-22T03:45:06.381Z
|
||||||
|
01/12/1925
|
||||||
|
2022-04-22 03:45:06.381000000
|
||||||
|
2022-04-22 03:45:06.381000000
|
||||||
|
1925-12-01 00:00:00.000000000
|
||||||
|
2022-04-22 03:45:06.381000000
|
||||||
|
2022-04-22 03:45:06.381000000
|
||||||
|
1925-01-12 00:00:00.000000000
|
@ -0,0 +1,15 @@
|
|||||||
|
select * from format('TSV', '222222222222222');
|
||||||
|
select * from format('TSV', '22222222222.2222');
|
||||||
|
set date_time_input_format = 'basic';
|
||||||
|
select * from format('TSV', '2022-04-22T03:45:06.381');
|
||||||
|
select * from format('TSV', '2022-04-22T03:45:06.381Z');
|
||||||
|
select * from format('TSV', '01/12/1925');
|
||||||
|
set date_time_input_format = 'best_effort';
|
||||||
|
select * from format('TSV', '2022-04-22T03:45:06.381');
|
||||||
|
select toTimeZone(c1, 'UTC') from format('TSV', '2022-04-22T03:45:06.381Z');
|
||||||
|
select * from format('TSV', '01/12/1925');
|
||||||
|
set date_time_input_format = 'best_effort_us';
|
||||||
|
select * from format('TSV', '2022-04-22T03:45:06.381');
|
||||||
|
select toTimeZone(c1, 'UTC') from format('TSV', '2022-04-22T03:45:06.381Z');
|
||||||
|
select * from format('TSV', '01/12/1925');
|
||||||
|
|
5
tests/queries/0_stateless/02458_empty_hdfs_url.sql
Normal file
5
tests/queries/0_stateless/02458_empty_hdfs_url.sql
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
-- Tags: no-fasttest, no-cpu-aarch64
|
||||||
|
SELECT * FROM hdfsCluster('test_shard_localhost', '', 'TSV'); -- { serverError BAD_ARGUMENTS }
|
||||||
|
SELECT * FROM hdfsCluster('test_shard_localhost', ' ', 'TSV'); -- { serverError BAD_ARGUMENTS }
|
||||||
|
SELECT * FROM hdfsCluster('test_shard_localhost', '/', 'TSV'); -- { serverError BAD_ARGUMENTS }
|
||||||
|
SELECT * FROM hdfsCluster('test_shard_localhost', 'http/', 'TSV'); -- { serverError BAD_ARGUMENTS }
|
@ -0,0 +1,20 @@
|
|||||||
|
0 4950
|
||||||
|
1 14950
|
||||||
|
2 24950
|
||||||
|
3 34950
|
||||||
|
4 44950
|
||||||
|
5 54950
|
||||||
|
6 64950
|
||||||
|
7 74950
|
||||||
|
8 84950
|
||||||
|
9 94950
|
||||||
|
0 4950
|
||||||
|
1 14950
|
||||||
|
2 24950
|
||||||
|
3 34950
|
||||||
|
4 44950
|
||||||
|
5 54950
|
||||||
|
6 64950
|
||||||
|
7 74950
|
||||||
|
8 84950
|
||||||
|
9 94950
|
@ -0,0 +1,9 @@
|
|||||||
|
SET allow_suspicious_low_cardinality_types = 1;
|
||||||
|
-- LC UInt128
|
||||||
|
CREATE TABLE group_by_pk_lc_uint128 (`k` LowCardinality(UInt128), `v` UInt32) ENGINE = MergeTree ORDER BY k PARTITION BY v%50;
|
||||||
|
INSERT INTO group_by_pk_lc_uint128 SELECT number / 100, number FROM numbers(1000);
|
||||||
|
SELECT k, sum(v) AS s FROM group_by_pk_lc_uint128 GROUP BY k ORDER BY k ASC LIMIT 1024 SETTINGS optimize_aggregation_in_order = 1;
|
||||||
|
-- LC UInt256
|
||||||
|
CREATE TABLE group_by_pk_lc_uint256 (`k` LowCardinality(UInt256), `v` UInt32) ENGINE = MergeTree ORDER BY k PARTITION BY v%50;
|
||||||
|
INSERT INTO group_by_pk_lc_uint256 SELECT number / 100, number FROM numbers(1000);
|
||||||
|
SELECT k, sum(v) AS s FROM group_by_pk_lc_uint256 GROUP BY k ORDER BY k ASC LIMIT 1024 SETTINGS optimize_aggregation_in_order = 1;
|
@ -0,0 +1,2 @@
|
|||||||
|
1 0
|
||||||
|
\N 1
|
12
tests/queries/0_stateless/02461_join_lc_issue_42380.sql
Normal file
12
tests/queries/0_stateless/02461_join_lc_issue_42380.sql
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
DROP TABLE IF EXISTS t1__fuzz_13;
|
||||||
|
DROP TABLE IF EXISTS t2__fuzz_47;
|
||||||
|
|
||||||
|
SET allow_suspicious_low_cardinality_types = 1;
|
||||||
|
|
||||||
|
CREATE TABLE t1__fuzz_13 (id Nullable(Int16)) ENGINE = MergeTree() ORDER BY id SETTINGS allow_nullable_key = 1;
|
||||||
|
CREATE TABLE t2__fuzz_47 (id LowCardinality(Int16)) ENGINE = MergeTree() ORDER BY id;
|
||||||
|
|
||||||
|
INSERT INTO t1__fuzz_13 VALUES (1);
|
||||||
|
INSERT INTO t2__fuzz_47 VALUES (1);
|
||||||
|
|
||||||
|
SELECT * FROM t1__fuzz_13 FULL OUTER JOIN t2__fuzz_47 ON 1 = 2;
|
@ -59,10 +59,7 @@ declare -A EXTERN_TYPES
|
|||||||
EXTERN_TYPES[ErrorCodes]=int
|
EXTERN_TYPES[ErrorCodes]=int
|
||||||
EXTERN_TYPES[ProfileEvents]=Event
|
EXTERN_TYPES[ProfileEvents]=Event
|
||||||
EXTERN_TYPES[CurrentMetrics]=Metric
|
EXTERN_TYPES[CurrentMetrics]=Metric
|
||||||
declare -A EXTERN_ALLOWED_CHARS
|
|
||||||
EXTERN_ALLOWED_CHARS[ErrorCodes]='_A-Z'
|
|
||||||
EXTERN_ALLOWED_CHARS[ProfileEvents]='_A-Za-z'
|
|
||||||
EXTERN_ALLOWED_CHARS[CurrentMetrics]='_A-Za-z'
|
|
||||||
EXTERN_TYPES_EXCLUDES=(
|
EXTERN_TYPES_EXCLUDES=(
|
||||||
ProfileEvents::global_counters
|
ProfileEvents::global_counters
|
||||||
ProfileEvents::Event
|
ProfileEvents::Event
|
||||||
@ -87,18 +84,30 @@ EXTERN_TYPES_EXCLUDES=(
|
|||||||
CurrentMetrics::Metric
|
CurrentMetrics::Metric
|
||||||
CurrentMetrics::values
|
CurrentMetrics::values
|
||||||
CurrentMetrics::Value
|
CurrentMetrics::Value
|
||||||
|
|
||||||
|
ErrorCodes::ErrorCode
|
||||||
|
ErrorCodes::getName
|
||||||
|
ErrorCodes::increment
|
||||||
|
ErrorCodes::end
|
||||||
|
ErrorCodes::values
|
||||||
|
ErrorCodes::values[i]
|
||||||
|
ErrorCodes::getErrorCodeByName
|
||||||
)
|
)
|
||||||
for extern_type in ${!EXTERN_TYPES[@]}; do
|
for extern_type in ${!EXTERN_TYPES[@]}; do
|
||||||
type_of_extern=${EXTERN_TYPES[$extern_type]}
|
type_of_extern=${EXTERN_TYPES[$extern_type]}
|
||||||
allowed_chars=${EXTERN_ALLOWED_CHARS[$extern_type]}
|
allowed_chars='[_A-Za-z]+'
|
||||||
|
|
||||||
# Unused
|
# Unused
|
||||||
# NOTE: to fix automatically, replace echo with:
|
# NOTE: to fix automatically, replace echo with:
|
||||||
# sed -i "/extern const $type_of_extern $val/d" $file
|
# sed -i "/extern const $type_of_extern $val/d" $file
|
||||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
||||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern [$allowed_chars]+"
|
# NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
|
||||||
|
# and this matches with zkutil::CreateMode
|
||||||
|
grep -v 'src/Common/ZooKeeper/Types.h'
|
||||||
|
} | {
|
||||||
|
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
|
||||||
} | while read file; do
|
} | while read file; do
|
||||||
grep -P "extern const $type_of_extern [$allowed_chars]+;" $file | sed -r -e "s/^.*?extern const $type_of_extern ([$allowed_chars]+);.*?$/\1/" | while read val; do
|
grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
|
||||||
if ! grep -q "$extern_type::$val" $file; then
|
if ! grep -q "$extern_type::$val" $file; then
|
||||||
# Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
|
# Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
|
||||||
if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
|
if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
|
||||||
@ -110,11 +119,13 @@ for extern_type in ${!EXTERN_TYPES[@]}; do
|
|||||||
|
|
||||||
# Undefined
|
# Undefined
|
||||||
# NOTE: to fix automatically, replace echo with:
|
# NOTE: to fix automatically, replace echo with:
|
||||||
# ( grep -q -F 'namespace $extern_type' $file && sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
|
# ( grep -q -F 'namespace $extern_type' $file && \
|
||||||
|
# sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
|
||||||
|
# awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
|
||||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
||||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::[$allowed_chars]+"
|
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
|
||||||
} | while read file; do
|
} | while read file; do
|
||||||
grep -P "$extern_type::[$allowed_chars]+" $file | sed -r -e "s/^.*?$extern_type::([$allowed_chars]+).*?$/\1/" | while read val; do
|
grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
|
||||||
if ! grep -q "extern const $type_of_extern $val" $file; then
|
if ! grep -q "extern const $type_of_extern $val" $file; then
|
||||||
if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
|
if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
|
||||||
echo "$extern_type::$val is used in file $file but not defined"
|
echo "$extern_type::$val is used in file $file but not defined"
|
||||||
@ -125,9 +136,9 @@ for extern_type in ${!EXTERN_TYPES[@]}; do
|
|||||||
|
|
||||||
# Duplicates
|
# Duplicates
|
||||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
||||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::[$allowed_chars]+"
|
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
|
||||||
} | while read file; do
|
} | while read file; do
|
||||||
grep -P "extern const $type_of_extern [$allowed_chars]+;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
|
grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user