mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into align-branch-within-32b
This commit is contained in:
commit
d8b1b44a29
@ -176,6 +176,8 @@ CheckOptions:
|
||||
value: CamelCase
|
||||
- key: modernize-loop-convert.UseCxx20ReverseRanges
|
||||
value: false
|
||||
- key: performance-move-const-arg.CheckTriviallyCopyableMove
|
||||
value: false
|
||||
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
|
||||
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
|
||||
value: expr-type
|
||||
|
2
.github/ISSUE_TEMPLATE/85_bug-report.md
vendored
2
.github/ISSUE_TEMPLATE/85_bug-report.md
vendored
@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Wrong behaviour (visible to users) in official ClickHouse release.
|
||||
about: Wrong behavior (visible to users) in the official ClickHouse release.
|
||||
title: ''
|
||||
labels: 'potential bug'
|
||||
assignees: ''
|
||||
|
4
.github/workflows/backport.yml
vendored
4
.github/workflows/backport.yml
vendored
@ -9,9 +9,11 @@ concurrency:
|
||||
on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: '0 */3 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
CherryPick:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
|
||||
|
35
.github/workflows/backport_branches.yml
vendored
35
.github/workflows/backport_branches.yml
vendored
@ -143,6 +143,8 @@ jobs:
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
@ -188,6 +190,8 @@ jobs:
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
@ -346,6 +350,36 @@ jobs:
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
needs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
@ -556,6 +590,7 @@ jobs:
|
||||
FinishCheck:
|
||||
needs:
|
||||
- DockerHubPush
|
||||
- DockerServerImages
|
||||
- BuilderReport
|
||||
- FunctionalStatelessTestAsan
|
||||
- FunctionalStatefulTestDebug
|
||||
|
4
.github/workflows/master.yml
vendored
4
.github/workflows/master.yml
vendored
@ -643,7 +643,7 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinTidy:
|
||||
BuilderBinClangTidy:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
@ -1011,7 +1011,7 @@ jobs:
|
||||
- BuilderBinFreeBSD
|
||||
# - BuilderBinGCC
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinTidy
|
||||
- BuilderBinClangTidy
|
||||
- BuilderDebSplitted
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
|
4
.github/workflows/pull_request.yml
vendored
4
.github/workflows/pull_request.yml
vendored
@ -707,7 +707,7 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinTidy:
|
||||
BuilderBinClangTidy:
|
||||
needs: [DockerHubPush, FastTest]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
@ -1065,7 +1065,7 @@ jobs:
|
||||
- BuilderBinFreeBSD
|
||||
# - BuilderBinGCC
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinTidy
|
||||
- BuilderBinClangTidy
|
||||
- BuilderDebSplitted
|
||||
runs-on: [self-hosted, style-checker]
|
||||
if: ${{ success() || failure() }}
|
||||
|
3
.github/workflows/release.yml
vendored
3
.github/workflows/release.yml
vendored
@ -21,6 +21,9 @@ jobs:
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
# Always use the most recent script version
|
||||
ref: master
|
||||
- name: Download packages and push to Artifactory
|
||||
run: |
|
||||
rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH"
|
||||
|
31
.github/workflows/release_branches.yml
vendored
31
.github/workflows/release_branches.yml
vendored
@ -427,6 +427,36 @@ jobs:
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
needs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
@ -1815,6 +1845,7 @@ jobs:
|
||||
FinishCheck:
|
||||
needs:
|
||||
- DockerHubPush
|
||||
- DockerServerImages
|
||||
- BuilderReport
|
||||
- FunctionalStatelessTestDebug0
|
||||
- FunctionalStatelessTestDebug1
|
||||
|
15
.gitmodules
vendored
15
.gitmodules
vendored
@ -86,9 +86,6 @@
|
||||
[submodule "contrib/h3"]
|
||||
path = contrib/h3
|
||||
url = https://github.com/ClickHouse/h3
|
||||
[submodule "contrib/hyperscan"]
|
||||
path = contrib/hyperscan
|
||||
url = https://github.com/ClickHouse/hyperscan.git
|
||||
[submodule "contrib/libunwind"]
|
||||
path = contrib/libunwind
|
||||
url = https://github.com/ClickHouse/libunwind.git
|
||||
@ -268,3 +265,15 @@
|
||||
[submodule "contrib/hashidsxx"]
|
||||
path = contrib/hashidsxx
|
||||
url = https://github.com/schoentoon/hashidsxx.git
|
||||
[submodule "contrib/nats-io"]
|
||||
path = contrib/nats-io
|
||||
url = https://github.com/ClickHouse/nats.c.git
|
||||
[submodule "contrib/vectorscan"]
|
||||
path = contrib/vectorscan
|
||||
url = https://github.com/VectorCamp/vectorscan.git
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing.git
|
||||
[submodule "contrib/base-x"]
|
||||
path = contrib/base-x
|
||||
url = https://github.com/ClickHouse/base-x.git
|
||||
|
@ -13,9 +13,7 @@ max-statements=200
|
||||
ignore-long-lines = (# )?<?https?://\S+>?$
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
disable = bad-continuation,
|
||||
missing-docstring,
|
||||
bad-whitespace,
|
||||
disable = missing-docstring,
|
||||
too-few-public-methods,
|
||||
invalid-name,
|
||||
too-many-arguments,
|
||||
|
170
CHANGELOG.md
170
CHANGELOG.md
@ -1,10 +1,176 @@
|
||||
### Table of Contents
|
||||
**[ClickHouse release v22.6, 2022-06-16](#226)**<br>
|
||||
**[ClickHouse release v22.5, 2022-05-19](#225)**<br>
|
||||
**[ClickHouse release v22.4, 2022-04-20](#224)**<br>
|
||||
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br>
|
||||
**[ClickHouse release v22.2, 2022-02-17](#222)**<br>
|
||||
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
|
||||
**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**<br>
|
||||
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br>
|
||||
|
||||
### <a id="226"></a> ClickHouse release 22.6, 2022-06-16
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Remove support for octal number literals in SQL. In previous versions they were parsed as Float64. [#37765](https://github.com/ClickHouse/ClickHouse/pull/37765) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Changes how settings using `seconds` as type are parsed to support floating point values (for example: `max_execution_time=0.5`). Infinity or NaN values will throw an exception. [#37187](https://github.com/ClickHouse/ClickHouse/pull/37187) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Changed format of binary serialization of columns of experimental type `Object`. New format is more convenient to implement by third-party clients. [#37482](https://github.com/ClickHouse/ClickHouse/pull/37482) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Turn on setting `output_format_json_named_tuples_as_objects` by default. It allows to serialize named tuples as JSON objects in JSON formats. [#37756](https://github.com/ClickHouse/ClickHouse/pull/37756) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* LIKE patterns with trailing escape symbol ('\\') are now disallowed (as mandated by the SQL standard). [#37764](https://github.com/ClickHouse/ClickHouse/pull/37764) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* If you run different ClickHouse versions on a cluster with AArch64 CPU or mix AArch64 and amd64 on a cluster, and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, and the size of the result is huge, the data will not be fully aggregated in the result of these queries during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade.
|
||||
|
||||
#### New Feature
|
||||
* Add `GROUPING` function. It allows to disambiguate the records in the queries with `ROLLUP`, `CUBE` or `GROUPING SETS`. Closes [#19426](https://github.com/ClickHouse/ClickHouse/issues/19426). [#37163](https://github.com/ClickHouse/ClickHouse/pull/37163) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* A new codec [FPC](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf) algorithm for floating point data compression. [#37553](https://github.com/ClickHouse/ClickHouse/pull/37553) ([Mikhail Guzov](https://github.com/koloshmet)).
|
||||
* Add new columnar JSON formats: `JSONColumns`, `JSONCompactColumns`, `JSONColumnsWithMetadata`. Closes [#36338](https://github.com/ClickHouse/ClickHouse/issues/36338) Closes [#34509](https://github.com/ClickHouse/ClickHouse/issues/34509). [#36975](https://github.com/ClickHouse/ClickHouse/pull/36975) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Added open telemetry traces visualizing tool based on d3js. [#37810](https://github.com/ClickHouse/ClickHouse/pull/37810) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Support INSERTs into `system.zookeeper` table. Closes [#22130](https://github.com/ClickHouse/ClickHouse/issues/22130). [#37596](https://github.com/ClickHouse/ClickHouse/pull/37596) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Support non-constant pattern argument for `LIKE`, `ILIKE` and `match` functions. [#37251](https://github.com/ClickHouse/ClickHouse/pull/37251) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Executable user defined functions now support parameters. Example: `SELECT test_function(parameters)(arguments)`. Closes [#37578](https://github.com/ClickHouse/ClickHouse/issues/37578). [#37720](https://github.com/ClickHouse/ClickHouse/pull/37720) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add `merge_reason` column to system.part_log table. [#36912](https://github.com/ClickHouse/ClickHouse/pull/36912) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Add support for Maps and Records in Avro format. Add new setting `input_format_avro_null_as_default ` that allow to insert null as default in Avro format. Closes [#18925](https://github.com/ClickHouse/ClickHouse/issues/18925) Closes [#37378](https://github.com/ClickHouse/ClickHouse/issues/37378) Closes [#32899](https://github.com/ClickHouse/ClickHouse/issues/32899). [#37525](https://github.com/ClickHouse/ClickHouse/pull/37525) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add `clickhouse-disks` tool to introspect and operate on virtual filesystems configured for ClickHouse. [#36060](https://github.com/ClickHouse/ClickHouse/pull/36060) ([Artyom Yurkov](https://github.com/Varinara)).
|
||||
* Adds H3 unidirectional edge functions. [#36843](https://github.com/ClickHouse/ClickHouse/pull/36843) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Add support for calculating [hashids](https://hashids.org/) from unsigned integers. [#37013](https://github.com/ClickHouse/ClickHouse/pull/37013) ([Michael Nutt](https://github.com/mnutt)).
|
||||
* Explicit `SALT` specification is allowed for `CREATE USER <user> IDENTIFIED WITH sha256_hash`. [#37377](https://github.com/ClickHouse/ClickHouse/pull/37377) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Add two new settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines` to allow skipping specified number of lines in the beginning of the file in CSV/TSV formats. [#37537](https://github.com/ClickHouse/ClickHouse/pull/37537) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* `showCertificate` function shows current server's SSL certificate. [#37540](https://github.com/ClickHouse/ClickHouse/pull/37540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* HTTP source for Data Dictionaries in Named Collections is supported. [#37581](https://github.com/ClickHouse/ClickHouse/pull/37581) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Implemented changing the comment for `ReplicatedMergeTree` tables. [#37416](https://github.com/ClickHouse/ClickHouse/pull/37416) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)).
|
||||
|
||||
#### Experimental Feature
|
||||
* Enables `POPULATE` for `WINDOW VIEW`. [#36945](https://github.com/ClickHouse/ClickHouse/pull/36945) ([vxider](https://github.com/Vxider)).
|
||||
* `ALTER TABLE ... MODIFY QUERY` support for `WINDOW VIEW`. [#37188](https://github.com/ClickHouse/ClickHouse/pull/37188) ([vxider](https://github.com/Vxider)).
|
||||
* This PR changes the behavior of the `ENGINE` syntax in `WINDOW VIEW`, to make it like in `MATERIALIZED VIEW`. [#37214](https://github.com/ClickHouse/ClickHouse/pull/37214) ([vxider](https://github.com/Vxider)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Added numerous optimizations for ARM NEON [#38093](https://github.com/ClickHouse/ClickHouse/pull/38093)([Daniel Kutenin](https://github.com/danlark1)), ([Alexandra Pilipyuk](https://github.com/chalice19)) Note: if you run different ClickHouse versions on a cluster with ARM CPU and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, the result of the aggregation query will be wrong during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade.
|
||||
* Improve performance and memory usage for select of subset of columns for formats Native, Protobuf, CapnProto, JSONEachRow, TSKV, all formats with suffixes WithNames/WithNamesAndTypes. Previously while selecting only subset of columns from files in these formats all columns were read and stored in memory. Now only required columns are read. This PR enables setting `input_format_skip_unknown_fields` by default, because otherwise in case of select of subset of columns exception will be thrown. [#37192](https://github.com/ClickHouse/ClickHouse/pull/37192) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Now more filters can be pushed down for join. [#37472](https://github.com/ClickHouse/ClickHouse/pull/37472) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Load marks for only necessary columns when reading wide parts. [#36879](https://github.com/ClickHouse/ClickHouse/pull/36879) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Improved performance of aggregation in case, when sparse columns (can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization` in `MergeTree` tables) are used as arguments in aggregate functions. [#37617](https://github.com/ClickHouse/ClickHouse/pull/37617) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Optimize function `COALESCE` with two arguments. [#37666](https://github.com/ClickHouse/ClickHouse/pull/37666) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Replace `multiIf` to `if` in case when `multiIf` has only one condition, because function `if` is more performant. [#37695](https://github.com/ClickHouse/ClickHouse/pull/37695) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Improve performance of `dictGetDescendants`, `dictGetChildren` functions, create temporary parent to children hierarchical index per query, not per function call during query. Allow to specify `BIDIRECTIONAL` for `HIERARHICAL` attributes, dictionary will maintain parent to children index in memory, that way functions `dictGetDescendants`, `dictGetChildren` will not create temporary index per query. Closes [#32481](https://github.com/ClickHouse/ClickHouse/issues/32481). [#37148](https://github.com/ClickHouse/ClickHouse/pull/37148) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Aggregates state destruction now may be posted on a thread pool. For queries with LIMIT and big state it provides significant speedup, e.g. `select uniq(number) from numbers_mt(1e7) group by number limit 100` became around 2.5x faster. [#37855](https://github.com/ClickHouse/ClickHouse/pull/37855) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Improve sort performance by single column. [#37195](https://github.com/ClickHouse/ClickHouse/pull/37195) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of single column sorting using sorting queue specializations. [#37990](https://github.com/ClickHouse/ClickHouse/pull/37990) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improved performance on array norm and distance functions 2x-4x times. [#37394](https://github.com/ClickHouse/ClickHouse/pull/37394) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Improve performance of number comparison functions using dynamic dispatch. [#37399](https://github.com/ClickHouse/ClickHouse/pull/37399) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of ORDER BY with LIMIT. [#37481](https://github.com/ClickHouse/ClickHouse/pull/37481) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of `hasAll` function using dynamic dispatch infrastructure. [#37484](https://github.com/ClickHouse/ClickHouse/pull/37484) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of `greatCircleAngle`, `greatCircleDistance`, `geoDistance` functions. [#37524](https://github.com/ClickHouse/ClickHouse/pull/37524) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of insert into MergeTree if there are multiple columns in ORDER BY. [#35762](https://github.com/ClickHouse/ClickHouse/pull/35762) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix excessive CPU usage in background when there are a lot of tables. [#38028](https://github.com/ClickHouse/ClickHouse/pull/38028) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of `not` function using dynamic dispatch. [#38058](https://github.com/ClickHouse/ClickHouse/pull/38058) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Optimized the internal caching of re2 patterns which occur e.g. in LIKE and MATCH functions. [#37544](https://github.com/ClickHouse/ClickHouse/pull/37544) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Improve filter bitmask generator function all in one with AVX-512 instructions. [#37588](https://github.com/ClickHouse/ClickHouse/pull/37588) ([yaqi-zhao](https://github.com/yaqi-zhao)).
|
||||
* Apply read method `threadpool` for Hive integration engine. This will significantly speed up reading. [#36328](https://github.com/ClickHouse/ClickHouse/pull/36328) ([李扬](https://github.com/taiyang-li)).
|
||||
* When all the columns to read are partition keys, construct columns by the file's row number without real reading the Hive file. [#37103](https://github.com/ClickHouse/ClickHouse/pull/37103) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Support multi disks for caching hive files. [#37279](https://github.com/ClickHouse/ClickHouse/pull/37279) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Limiting the maximum cache usage per query can effectively prevent cache pool contamination. [Related Issues](https://github.com/ClickHouse/ClickHouse/issues/28961). [#37859](https://github.com/ClickHouse/ClickHouse/pull/37859) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* Currently clickhouse directly downloads all remote files to the local cache (even if they are only read once), which will frequently cause IO of the local hard disk. In some scenarios, these IOs may not be necessary and may easily cause negative optimization. As shown in the figure below, when we run SSB Q1-Q4, the performance of the cache has caused negative optimization. [#37516](https://github.com/ClickHouse/ClickHouse/pull/37516) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* Allow to prune the list of files via virtual columns such as `_file` and `_path` when reading from S3. This is for [#37174](https://github.com/ClickHouse/ClickHouse/issues/37174) , [#23494](https://github.com/ClickHouse/ClickHouse/issues/23494). [#37356](https://github.com/ClickHouse/ClickHouse/pull/37356) ([Amos Bird](https://github.com/amosbird)).
|
||||
* In function: CompressedWriteBuffer::nextImpl(), there is an unnecessary write-copy step that would happen frequently during inserting data. Below shows the differentiation with this patch: - Before: 1. Compress "working_buffer" into "compressed_buffer" 2. write-copy into "out" - After: Directly Compress "working_buffer" into "out". [#37242](https://github.com/ClickHouse/ClickHouse/pull/37242) ([jasperzhu](https://github.com/jinjunzh)).
|
||||
|
||||
#### Improvement
|
||||
* Support types with non-standard defaults in ROLLUP, CUBE, GROUPING SETS. Closes [#37360](https://github.com/ClickHouse/ClickHouse/issues/37360). [#37667](https://github.com/ClickHouse/ClickHouse/pull/37667) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix stack traces collection on ARM. Closes [#37044](https://github.com/ClickHouse/ClickHouse/issues/37044). Closes [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638). [#37797](https://github.com/ClickHouse/ClickHouse/pull/37797) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Client will try every IP address returned by DNS resolution until successful connection. [#37273](https://github.com/ClickHouse/ClickHouse/pull/37273) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Allow to use String type instead of Binary in Arrow/Parquet/ORC formats. This PR introduces 3 new settings for it: `output_format_arrow_string_as_string`, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`. Default value for all settings is `false`. [#37327](https://github.com/ClickHouse/ClickHouse/pull/37327) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Apply setting `input_format_max_rows_to_read_for_schema_inference` for all read rows in total from all files in globs. Previously setting `input_format_max_rows_to_read_for_schema_inference` was applied for each file in glob separately and in case of huge number of nulls we could read first `input_format_max_rows_to_read_for_schema_inference` rows from each file and get nothing. Also increase default value for this setting to 25000. [#37332](https://github.com/ClickHouse/ClickHouse/pull/37332) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add separate `CLUSTER` grant (and `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive, for backward compatibility, default to `false`). [#35767](https://github.com/ClickHouse/ClickHouse/pull/35767) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Added support for schema inference for `hdfsCluster`. [#35812](https://github.com/ClickHouse/ClickHouse/pull/35812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Implement `least_used` load balancing algorithm for disks inside volume (multi disk configuration). [#36686](https://github.com/ClickHouse/ClickHouse/pull/36686) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Modify the HTTP Endpoint to return the full stats under the `X-ClickHouse-Summary` header when `send_progress_in_http_headers=0` (before it would return all zeros). - Modify the HTTP Endpoint to return `X-ClickHouse-Exception-Code` header when progress has been sent before (`send_progress_in_http_headers=1`) - Modify the HTTP Endpoint to return `HTTP_REQUEST_TIMEOUT` (408) instead of `HTTP_INTERNAL_SERVER_ERROR` (500) on `TIMEOUT_EXCEEDED` errors. [#36884](https://github.com/ClickHouse/ClickHouse/pull/36884) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Allow a user to inspect grants from granted roles. [#36941](https://github.com/ClickHouse/ClickHouse/pull/36941) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Do not calculate an integral numerically but use CDF functions instead. This will speed up execution and will increase the precision. This fixes [#36714](https://github.com/ClickHouse/ClickHouse/issues/36714). [#36953](https://github.com/ClickHouse/ClickHouse/pull/36953) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Add default implementation for Nothing in functions. Now most of the functions will return column with type Nothing in case one of it's arguments is Nothing. It also solves problem with functions like arrayMap/arrayFilter and similar when they have empty array as an argument. Previously queries like `select arrayMap(x -> 2 * x, []);` failed because function inside lambda cannot work with type `Nothing`, now such queries return empty array with type `Array(Nothing)`. Also add support for arrays of nullable types in functions like arrayFilter/arrayFill. Previously, queries like `select arrayFilter(x -> x % 2, [1, NULL])` failed, now they work (if the result of lambda is NULL, then this value won't be included in the result). Closes [#37000](https://github.com/ClickHouse/ClickHouse/issues/37000). [#37048](https://github.com/ClickHouse/ClickHouse/pull/37048) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Now if a shard has local replica we create a local plan and a plan to read from all remote replicas. They have shared initiator which coordinates reading. [#37204](https://github.com/ClickHouse/ClickHouse/pull/37204) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Do no longer abort server startup if configuration option "mark_cache_size" is not explicitly set. [#37326](https://github.com/ClickHouse/ClickHouse/pull/37326) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Allows providing `NULL`/`NOT NULL` right after type in column declaration. [#37337](https://github.com/ClickHouse/ClickHouse/pull/37337) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* optimize file segment PARTIALLY_DOWNLOADED get read buffer. [#37338](https://github.com/ClickHouse/ClickHouse/pull/37338) ([xiedeyantu](https://github.com/xiedeyantu)).
|
||||
* Try to improve short circuit functions processing to fix problems with stress tests. [#37384](https://github.com/ClickHouse/ClickHouse/pull/37384) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Closes [#37395](https://github.com/ClickHouse/ClickHouse/issues/37395). [#37415](https://github.com/ClickHouse/ClickHouse/pull/37415) ([Memo](https://github.com/Joeywzr)).
|
||||
* Fix extremely rare deadlock during part fetch in zero-copy replication. Fixes [#37423](https://github.com/ClickHouse/ClickHouse/issues/37423). [#37424](https://github.com/ClickHouse/ClickHouse/pull/37424) ([metahys](https://github.com/metahys)).
|
||||
* Don't allow to create storage with unknown data format. [#37450](https://github.com/ClickHouse/ClickHouse/pull/37450) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Set `global_memory_usage_overcommit_max_wait_microseconds` default value to 5 seconds. Add info about `OvercommitTracker` to OOM exception message. Add `MemoryOvercommitWaitTimeMicroseconds` profile event. [#37460](https://github.com/ClickHouse/ClickHouse/pull/37460) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Do not display `-0.0` CPU time in clickhouse-client. It can appear due to rounding errors. This closes [#38003](https://github.com/ClickHouse/ClickHouse/issues/38003). This closes [#38038](https://github.com/ClickHouse/ClickHouse/issues/38038). [#38064](https://github.com/ClickHouse/ClickHouse/pull/38064) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Play UI: Keep controls in place when the page is scrolled horizontally. This makes edits comfortable even if the table is wide and it was scrolled far to the right. The feature proposed by Maksym Tereshchenko from CaspianDB. [#37470](https://github.com/ClickHouse/ClickHouse/pull/37470) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Modify query div in play.html to be extendable beyond 20% height. In case of very long queries it is helpful to extend the textarea element, only today, since the div is fixed height, the extended textarea hides the data div underneath. With this fix, extending the textarea element will push the data div down/up such the extended textarea won't hide it. Also, keeps query box width 100% even when the user adjusting the size of the query textarea. [#37488](https://github.com/ClickHouse/ClickHouse/pull/37488) ([guyco87](https://github.com/guyco87)).
|
||||
* Added `ProfileEvents` for introspection of type of written (inserted or merged) parts (`Inserted{Wide/Compact/InMemory}Parts`, `MergedInto{Wide/Compact/InMemory}Parts`. Added column `part_type` to `system.part_log`. Resolves [#37495](https://github.com/ClickHouse/ClickHouse/issues/37495). [#37536](https://github.com/ClickHouse/ClickHouse/pull/37536) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* clickhouse-keeper improvement: move broken logs to a timestamped folder. [#37565](https://github.com/ClickHouse/ClickHouse/pull/37565) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Do not write expired columns by TTL after subsequent merges (before only first merge/optimize of the part will not write expired by TTL columns, all other will do). [#37570](https://github.com/ClickHouse/ClickHouse/pull/37570) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* More precise result of the `dumpColumnStructure` miscellaneous function in presence of LowCardinality or Sparse columns. In previous versions, these functions were converting the argument to a full column before returning the result. This is needed to provide an answer in [#6935](https://github.com/ClickHouse/ClickHouse/issues/6935). [#37633](https://github.com/ClickHouse/ClickHouse/pull/37633) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* clickhouse-keeper: store only unique session IDs for watches. [#37641](https://github.com/ClickHouse/ClickHouse/pull/37641) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix possible "Cannot write to finalized buffer". [#37645](https://github.com/ClickHouse/ClickHouse/pull/37645) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add setting `support_batch_delete` for `DiskS3` to disable multiobject delete calls, which Google Cloud Storage doesn't support. [#37659](https://github.com/ClickHouse/ClickHouse/pull/37659) ([Fred Wulff](https://github.com/frew)).
|
||||
* Add an option to disable connection pooling in ODBC bridge. [#37705](https://github.com/ClickHouse/ClickHouse/pull/37705) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants` added support nullable `HIERARCHICAL` attribute in dictionaries. Closes [#35521](https://github.com/ClickHouse/ClickHouse/issues/35521). [#37805](https://github.com/ClickHouse/ClickHouse/pull/37805) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Expose BoringSSL version related info in the `system.build_options` table. [#37850](https://github.com/ClickHouse/ClickHouse/pull/37850) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Now clickhouse-server removes `delete_tmp` directories on server start. Fixes [#26503](https://github.com/ClickHouse/ClickHouse/issues/26503). [#37906](https://github.com/ClickHouse/ClickHouse/pull/37906) ([alesapin](https://github.com/alesapin)).
|
||||
* Clean up broken detached parts after timeout. Closes [#25195](https://github.com/ClickHouse/ClickHouse/issues/25195). [#37975](https://github.com/ClickHouse/ClickHouse/pull/37975) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Now in MergeTree table engines family failed-to-move parts will be removed instantly. [#37994](https://github.com/ClickHouse/ClickHouse/pull/37994) ([alesapin](https://github.com/alesapin)).
|
||||
* Now if setting `always_fetch_merged_part` is enabled for ReplicatedMergeTree merges will try to find parts on other replicas rarely with smaller load for [Zoo]Keeper. [#37995](https://github.com/ClickHouse/ClickHouse/pull/37995) ([alesapin](https://github.com/alesapin)).
|
||||
* Add implicit grants with grant option too. For example `GRANT CREATE TABLE ON test.* TO A WITH GRANT OPTION` now allows `A` to execute `GRANT CREATE VIEW ON test.* TO B`. [#38017](https://github.com/ClickHouse/ClickHouse/pull/38017) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Use `clang-14` and LLVM infrastructure version 14 for builds. This closes [#34681](https://github.com/ClickHouse/ClickHouse/issues/34681). [#34754](https://github.com/ClickHouse/ClickHouse/pull/34754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Note: `clang-14` has [a bug](https://github.com/google/sanitizers/issues/1540) in ThreadSanitizer that makes our CI work worse.
|
||||
* Allow to drop privileges at startup. This simplifies Docker images. Closes [#36293](https://github.com/ClickHouse/ClickHouse/issues/36293). [#36341](https://github.com/ClickHouse/ClickHouse/pull/36341) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add docs spellcheck to CI. [#37790](https://github.com/ClickHouse/ClickHouse/pull/37790) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix overly aggressive stripping which removed the embedded hash required for checking the consistency of the executable. [#37993](https://github.com/ClickHouse/ClickHouse/pull/37993) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix `SELECT ... INTERSECT` and `EXCEPT SELECT` statements with constant string types. [#37738](https://github.com/ClickHouse/ClickHouse/pull/37738) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix `GROUP BY` `AggregateFunction` (i.e. you `GROUP BY` by the column that has `AggregateFunction` type). [#37093](https://github.com/ClickHouse/ClickHouse/pull/37093) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* (experimental WINDOW VIEW) Fix `addDependency` in WindowView. This bug can be reproduced like [#37237](https://github.com/ClickHouse/ClickHouse/issues/37237). [#37224](https://github.com/ClickHouse/ClickHouse/pull/37224) ([vxider](https://github.com/Vxider)).
|
||||
* Fix inconsistency in ORDER BY ... WITH FILL feature. Query, containing ORDER BY ... WITH FILL, can generate extra rows when multiple WITH FILL columns are present. [#38074](https://github.com/ClickHouse/ClickHouse/pull/38074) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* This PR moving `addDependency` from constructor to `startup()` to avoid adding dependency to a *dropped* table, fix [#37237](https://github.com/ClickHouse/ClickHouse/issues/37237). [#37243](https://github.com/ClickHouse/ClickHouse/pull/37243) ([vxider](https://github.com/Vxider)).
|
||||
* Fix inserting defaults for missing values in columnar formats. Previously missing columns were filled with defaults for types, not for columns. [#37253](https://github.com/ClickHouse/ClickHouse/pull/37253) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* (experimental Object type) Fix some cases of insertion nested arrays to columns of type `Object`. [#37305](https://github.com/ClickHouse/ClickHouse/pull/37305) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix unexpected errors with a clash of constant strings in aggregate function, prewhere and join. Close [#36891](https://github.com/ClickHouse/ClickHouse/issues/36891). [#37336](https://github.com/ClickHouse/ClickHouse/pull/37336) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix projections with GROUP/ORDER BY in query and optimize_aggregation_in_order (before the result was incorrect since only finish sorting was performed). [#37342](https://github.com/ClickHouse/ClickHouse/pull/37342) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed error with symbols in key name in S3. Fixes [#33009](https://github.com/ClickHouse/ClickHouse/issues/33009). [#37344](https://github.com/ClickHouse/ClickHouse/pull/37344) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Throw an exception when GROUPING SETS used with ROLLUP or CUBE. [#37367](https://github.com/ClickHouse/ClickHouse/pull/37367) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix LOGICAL_ERROR in getMaxSourcePartsSizeForMerge during merges (in case of non standard, greater, values of `background_pool_size`/`background_merges_mutations_concurrency_ratio` has been specified in `config.xml` (new way) not in `users.xml` (deprecated way)). [#37413](https://github.com/ClickHouse/ClickHouse/pull/37413) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Stop removing UTF-8 BOM in RowBinary format. [#37428](https://github.com/ClickHouse/ClickHouse/pull/37428) ([Paul Loyd](https://github.com/loyd)). [#37428](https://github.com/ClickHouse/ClickHouse/pull/37428) ([Paul Loyd](https://github.com/loyd)).
|
||||
* clickhouse-keeper bugfix: fix force recovery for single node cluster. [#37440](https://github.com/ClickHouse/ClickHouse/pull/37440) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix logical error in normalizeUTF8 functions. Closes [#37298](https://github.com/ClickHouse/ClickHouse/issues/37298). [#37443](https://github.com/ClickHouse/ClickHouse/pull/37443) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix cast lowcard of nullable in JoinSwitcher, close [#37385](https://github.com/ClickHouse/ClickHouse/issues/37385). [#37453](https://github.com/ClickHouse/ClickHouse/pull/37453) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix named tuples output in ORC/Arrow/Parquet formats. [#37458](https://github.com/ClickHouse/ClickHouse/pull/37458) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix optimization of monotonous functions in ORDER BY clause in presence of GROUPING SETS. Fixes [#37401](https://github.com/ClickHouse/ClickHouse/issues/37401). [#37493](https://github.com/ClickHouse/ClickHouse/pull/37493) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix error on joining with dictionary on some conditions. Close [#37386](https://github.com/ClickHouse/ClickHouse/issues/37386). [#37530](https://github.com/ClickHouse/ClickHouse/pull/37530) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Prohibit `optimize_aggregation_in_order` with `GROUPING SETS` (fixes `LOGICAL_ERROR`). [#37542](https://github.com/ClickHouse/ClickHouse/pull/37542) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix wrong dump information of ActionsDAG. [#37587](https://github.com/ClickHouse/ClickHouse/pull/37587) ([zhanglistar](https://github.com/zhanglistar)).
|
||||
* Fix converting types for UNION queries (may produce LOGICAL_ERROR). [#37593](https://github.com/ClickHouse/ClickHouse/pull/37593) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `WITH FILL` modifier with negative intervals in `STEP` clause. Fixes [#37514](https://github.com/ClickHouse/ClickHouse/issues/37514). [#37600](https://github.com/ClickHouse/ClickHouse/pull/37600) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix illegal joinGet array usage when ` join_use_nulls = 1`. This fixes [#37562](https://github.com/ClickHouse/ClickHouse/issues/37562) . [#37650](https://github.com/ClickHouse/ClickHouse/pull/37650) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix columns number mismatch in cross join, close [#37561](https://github.com/ClickHouse/ClickHouse/issues/37561). [#37653](https://github.com/ClickHouse/ClickHouse/pull/37653) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix segmentation fault in `show create table` from mysql database when it is configured with named collections. Closes [#37683](https://github.com/ClickHouse/ClickHouse/issues/37683). [#37690](https://github.com/ClickHouse/ClickHouse/pull/37690) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix RabbitMQ Storage not being able to startup on server restart if storage was create without SETTINGS clause. Closes [#37463](https://github.com/ClickHouse/ClickHouse/issues/37463). [#37691](https://github.com/ClickHouse/ClickHouse/pull/37691) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* SQL user defined functions disable CREATE/DROP in readonly mode. Closes [#37280](https://github.com/ClickHouse/ClickHouse/issues/37280). [#37699](https://github.com/ClickHouse/ClickHouse/pull/37699) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix formatting of Nullable arguments for executable user defined functions. Closes [#35897](https://github.com/ClickHouse/ClickHouse/issues/35897). [#37711](https://github.com/ClickHouse/ClickHouse/pull/37711) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix optimization enabled by setting `optimize_monotonous_functions_in_order_by` in distributed queries. Fixes [#36037](https://github.com/ClickHouse/ClickHouse/issues/36037). [#37724](https://github.com/ClickHouse/ClickHouse/pull/37724) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible logical error: `Invalid Field get from type UInt64 to type Float64` in `values` table function. Closes [#37602](https://github.com/ClickHouse/ClickHouse/issues/37602). [#37754](https://github.com/ClickHouse/ClickHouse/pull/37754) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix possible segfault in schema inference in case of exception in SchemaReader constructor. Closes [#37680](https://github.com/ClickHouse/ClickHouse/issues/37680). [#37760](https://github.com/ClickHouse/ClickHouse/pull/37760) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix setting cast_ipv4_ipv6_default_on_conversion_error for internal cast function. Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#37761](https://github.com/ClickHouse/ClickHouse/pull/37761) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix toString error on DatatypeDate32. [#37775](https://github.com/ClickHouse/ClickHouse/pull/37775) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* The clickhouse-keeper setting `dead_session_check_period_ms` was transformed into microseconds (multiplied by 1000), which lead to dead sessions only being cleaned up after several minutes (instead of 500ms). [#37824](https://github.com/ClickHouse/ClickHouse/pull/37824) ([Michael Lex](https://github.com/mlex)).
|
||||
* Fix possible "No more packets are available" for distributed queries (in case of `async_socket_for_remote`/`use_hedged_requests` is disabled). [#37826](https://github.com/ClickHouse/ClickHouse/pull/37826) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* (experimental WINDOW VIEW) Do not drop the inner target table when executing `ALTER TABLE … MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)).
|
||||
* Fix directory ownership of coordination dir in clickhouse-keeper Docker image. Fixes [#37914](https://github.com/ClickHouse/ClickHouse/issues/37914). [#37915](https://github.com/ClickHouse/ClickHouse/pull/37915) ([James Maidment](https://github.com/jamesmaidment)).
|
||||
* Dictionaries fix custom query with update field and `{condition}`. Closes [#33746](https://github.com/ClickHouse/ClickHouse/issues/33746). [#37947](https://github.com/ClickHouse/ClickHouse/pull/37947) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix possible incorrect result of `SELECT ... WITH FILL` in the case when `ORDER BY` should be applied after `WITH FILL` result (e.g. for outer query). Incorrect result was caused by optimization for `ORDER BY` expressions ([#35623](https://github.com/ClickHouse/ClickHouse/issues/35623)). Closes [#37904](https://github.com/ClickHouse/ClickHouse/issues/37904). [#37959](https://github.com/ClickHouse/ClickHouse/pull/37959) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* (experimental WINDOW VIEW) Add missing default columns when pushing to the target table in WindowView, fix [#37815](https://github.com/ClickHouse/ClickHouse/issues/37815). [#37965](https://github.com/ClickHouse/ClickHouse/pull/37965) ([vxider](https://github.com/Vxider)).
|
||||
* Fixed too large stack frame that would cause compilation to fail. [#37996](https://github.com/ClickHouse/ClickHouse/pull/37996) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* When open enable_filesystem_query_cache_limit, throw Reserved cache size exceeds the remaining cache size. [#38004](https://github.com/ClickHouse/ClickHouse/pull/38004) ([xiedeyantu](https://github.com/xiedeyantu)).
|
||||
* Fix converting types for UNION queries (may produce LOGICAL_ERROR). [#34775](https://github.com/ClickHouse/ClickHouse/pull/34775) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* TTL merge may not be scheduled again if BackgroundExecutor is busy. --merges_with_ttl_counter is increased in selectPartsToMerge() --merge task will be ignored if BackgroundExecutor is busy --merges_with_ttl_counter will not be decrease. [#36387](https://github.com/ClickHouse/ClickHouse/pull/36387) ([lthaooo](https://github.com/lthaooo)).
|
||||
* Fix overridden settings value of `normalize_function_names`. [#36937](https://github.com/ClickHouse/ClickHouse/pull/36937) ([李扬](https://github.com/taiyang-li)).
|
||||
* Fix for exponential time decaying window functions. Now respecting boundaries of the window. [#36944](https://github.com/ClickHouse/ClickHouse/pull/36944) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Fix possible heap-use-after-free error when reading system.projection_parts and system.projection_parts_columns . This fixes [#37184](https://github.com/ClickHouse/ClickHouse/issues/37184). [#37185](https://github.com/ClickHouse/ClickHouse/pull/37185) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `DateTime64` fractional seconds behavior prior to Unix epoch. [#37697](https://github.com/ClickHouse/ClickHouse/pull/37697) ([Andrey Zvonov](https://github.com/zvonand)). [#37039](https://github.com/ClickHouse/ClickHouse/pull/37039) ([李扬](https://github.com/taiyang-li)).
|
||||
|
||||
|
||||
### <a id="225"></a> ClickHouse release 22.5, 2022-05-19
|
||||
|
||||
@ -172,7 +338,7 @@
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
||||
* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `parser_settings_after_format_compact` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `allow_settings_after_format_in_insert` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Function `yandexConsistentHash` (consistent hashing algorithm by Konstantin "kostik" Oblakov) is renamed to `kostikConsistentHash`. The old name is left as an alias for compatibility. Although this change is backward compatible, we may remove the alias in subsequent releases, that's why it's recommended to update the usages of this function in your apps. [#35553](https://github.com/ClickHouse/ClickHouse/pull/35553) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### New Feature
|
||||
|
@ -223,11 +223,25 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
|
||||
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT ON)
|
||||
else()
|
||||
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT OFF)
|
||||
endif()
|
||||
# Provides faster linking and lower binary size.
|
||||
# Tradeoff is the inability to debug some source files with e.g. gdb
|
||||
# (empty stack frames and no local variables)."
|
||||
option(OMIT_HEAVY_DEBUG_SYMBOLS
|
||||
"Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)"
|
||||
${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT})
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set(USE_DEBUG_HELPERS ON)
|
||||
endif()
|
||||
|
||||
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
|
||||
|
||||
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
|
||||
if (NOT BUILD_STANDALONE_KEEPER)
|
||||
option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON)
|
||||
@ -244,16 +258,18 @@ endif ()
|
||||
# Add a section with the hash of the compiled machine code for integrity checks.
|
||||
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
|
||||
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
|
||||
if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$"))
|
||||
if (CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$"))
|
||||
message(STATUS "Official build: A checksum hash will be added to the clickhouse executable")
|
||||
set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section")
|
||||
else ()
|
||||
message(STATUS "No official build: A checksum hash will not be added to the clickhouse executable")
|
||||
endif ()
|
||||
|
||||
# Allows to build stripped binary in a separate directory
|
||||
if (OBJCOPY_PATH AND STRIP_PATH)
|
||||
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
endif()
|
||||
# Optionally split binaries and debug symbols.
|
||||
option(SPLIT_DEBUG_SYMBOLS "Split binaries and debug symbols" OFF)
|
||||
if (SPLIT_DEBUG_SYMBOLS)
|
||||
message(STATUS "Will split binaries and debug symbols")
|
||||
set(SPLITTED_DEBUG_SYMBOLS_DIR "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
endif()
|
||||
|
||||
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
|
||||
@ -507,14 +523,14 @@ if (NOT ENABLE_JEMALLOC)
|
||||
message (WARNING "Non default allocator is disabled. This is not recommended for production builds.")
|
||||
endif ()
|
||||
|
||||
macro (add_executable target)
|
||||
macro (clickhouse_add_executable target)
|
||||
# invoke built-in add_executable
|
||||
# explicitly acquire and interpose malloc symbols by clickhouse_malloc
|
||||
# if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
|
||||
if (ARCH_AMD64 AND GLIBC_COMPATIBILITY AND ENABLE_THINLTO)
|
||||
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:memcpy>)
|
||||
add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:memcpy>)
|
||||
else ()
|
||||
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)
|
||||
add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)
|
||||
endif ()
|
||||
|
||||
get_target_property (type ${target} TYPE)
|
||||
|
@ -62,9 +62,10 @@ execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH)
|
||||
# By default, prefer clang on Linux
|
||||
# But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER.
|
||||
if (OS MATCHES "Linux"
|
||||
# some build systems may use CC/CXX env variables
|
||||
AND "$ENV{CC}" STREQUAL ""
|
||||
AND "$ENV{CXX}" STREQUAL "")
|
||||
AND "$ENV{CXX}" STREQUAL ""
|
||||
AND NOT DEFINED CMAKE_C_COMPILER
|
||||
AND NOT DEFINED CMAKE_CXX_COMPILER)
|
||||
find_program(CLANG_PATH clang)
|
||||
if (CLANG_PATH)
|
||||
set(CMAKE_C_COMPILER "clang" CACHE INTERNAL "")
|
||||
@ -87,8 +88,7 @@ if (OS MATCHES "Linux"
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "")
|
||||
elseif (ARCH MATCHES "^(ppc64le.*|PPC64LE.*)")
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-ppc64le.cmake" CACHE INTERNAL "")
|
||||
else ()
|
||||
else ()
|
||||
message (FATAL_ERROR "Unsupported architecture: ${ARCH}")
|
||||
endif ()
|
||||
|
||||
endif()
|
||||
|
@ -14,6 +14,9 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
|
||||
|
||||
## Upcoming Events
|
||||
## Upcoming events
|
||||
* [v22.7 Release Webinar](https://clickhouse.com/company/events/v22-7-release-webinar/) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
|
||||
* [ClickHouse Meetup at the Cloudflare office in London](https://www.meetup.com/clickhouse-london-user-group/events/286891586/) ClickHouse meetup at the Cloudflare office space in central London
|
||||
* [ClickHouse Meetup at the Metoda office in Munich](https://www.meetup.com/clickhouse-meetup-munich/events/286891667/) ClickHouse meetup at the Metoda office in Munich
|
||||
|
||||
|
||||
* [ClickHouse Meetup Amsterdam (in-person and online)](https://www.meetup.com/clickhouse-netherlands-user-group/events/286017044/) on June 8th, 2022
|
||||
|
51
SECURITY.md
51
SECURITY.md
@ -1,3 +1,4 @@
|
||||
|
||||
# Security Policy
|
||||
|
||||
## Security Announcements
|
||||
@ -7,35 +8,36 @@ Security fixes will be announced by posting them in the [security changelog](htt
|
||||
|
||||
The following versions of ClickHouse server are currently being supported with security updates:
|
||||
|
||||
| Version | Supported |
|
||||
| ------- | ------------------ |
|
||||
| 1.x | :x: |
|
||||
| 18.x | :x: |
|
||||
| 19.x | :x: |
|
||||
| 20.x | :x: |
|
||||
| 21.1 | :x: |
|
||||
| 21.2 | :x: |
|
||||
| 21.3 | :x: |
|
||||
| 21.4 | :x: |
|
||||
| 21.5 | :x: |
|
||||
| 21.6 | :x: |
|
||||
| 21.7 | :x: |
|
||||
| 21.8 | ✅ |
|
||||
| 21.9 | :x: |
|
||||
| 21.10 | :x: |
|
||||
| 21.11 | :x: |
|
||||
| 21.12 | :x: |
|
||||
| 22.1 | :x: |
|
||||
| 22.2 | :x: |
|
||||
| 22.3 | ✅ |
|
||||
| 22.4 | ✅ |
|
||||
| 22.5 | ✅ |
|
||||
| Version | Supported |
|
||||
|:-|:-|
|
||||
| 22.6 | ✔️ |
|
||||
| 22.5 | ✔️ |
|
||||
| 22.4 | ✔️ |
|
||||
| 22.3 | ✔️ |
|
||||
| 22.2 | ❌ |
|
||||
| 22.1 | ❌ |
|
||||
| 21.12 | ❌ |
|
||||
| 21.11 | ❌ |
|
||||
| 21.10 | ❌ |
|
||||
| 21.9 | ❌ |
|
||||
| 21.8 | ✔️ |
|
||||
| 21.7 | ❌ |
|
||||
| 21.6 | ❌ |
|
||||
| 21.5 | ❌ |
|
||||
| 21.4 | ❌ |
|
||||
| 21.3 | ❌ |
|
||||
| 21.2 | ❌ |
|
||||
| 21.1 | ❌ |
|
||||
| 20.* | ❌ |
|
||||
| 19.* | ❌ |
|
||||
| 18.* | ❌ |
|
||||
| 1.* | ❌ |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
|
||||
|
||||
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com).
|
||||
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
|
||||
|
||||
### When Should I Report a Vulnerability?
|
||||
|
||||
@ -57,4 +59,3 @@ As the security issue moves from triage, to identified fix, to release planning
|
||||
|
||||
A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect report date to disclosure date to be on the order of 7 days.
|
||||
|
||||
|
||||
|
@ -89,7 +89,7 @@ public:
|
||||
inline void returnObject(T && object_to_return)
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
|
||||
objects.emplace_back(std::move(object_to_return));
|
||||
--borrowed_objects_size;
|
||||
@ -107,14 +107,14 @@ public:
|
||||
/// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full.
|
||||
inline size_t allocatedObjectsSize() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return allocated_objects_size;
|
||||
}
|
||||
|
||||
/// Returns allocatedObjectsSize == maxSize
|
||||
inline bool isFull() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return allocated_objects_size == max_size;
|
||||
}
|
||||
|
||||
@ -122,7 +122,7 @@ public:
|
||||
/// Then client will wait during borrowObject function call.
|
||||
inline size_t borrowedObjectsSize() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return borrowed_objects_size;
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ struct Decimal
|
||||
using NativeType = T;
|
||||
|
||||
constexpr Decimal() = default;
|
||||
constexpr Decimal(Decimal<T> &&) = default;
|
||||
constexpr Decimal(Decimal<T> &&) noexcept = default;
|
||||
constexpr Decimal(const Decimal<T> &) = default;
|
||||
|
||||
constexpr Decimal(const T & value_): value(value_) {}
|
||||
@ -57,7 +57,7 @@ struct Decimal
|
||||
template <typename U>
|
||||
constexpr Decimal(const Decimal<U> & x): value(x.value) {}
|
||||
|
||||
constexpr Decimal<T> & operator = (Decimal<T> &&) = default;
|
||||
constexpr Decimal<T> & operator=(Decimal<T> &&) noexcept = default;
|
||||
constexpr Decimal<T> & operator = (const Decimal<T> &) = default;
|
||||
|
||||
constexpr operator T () const { return value; }
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <replxx.hxx>
|
||||
|
||||
#include <base/types.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
class LineReader
|
||||
{
|
||||
@ -20,8 +21,8 @@ public:
|
||||
void addWords(Words && new_words);
|
||||
|
||||
private:
|
||||
Words words;
|
||||
Words words_no_case;
|
||||
Words words TSA_GUARDED_BY(mutex);
|
||||
Words words_no_case TSA_GUARDED_BY(mutex);
|
||||
|
||||
std::mutex mutex;
|
||||
};
|
||||
@ -29,7 +30,7 @@ public:
|
||||
using Patterns = std::vector<const char *>;
|
||||
|
||||
LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters);
|
||||
virtual ~LineReader() {}
|
||||
virtual ~LineReader() = default;
|
||||
|
||||
/// Reads the whole line until delimiter (in multiline mode) or until the last line without extender.
|
||||
/// If resulting line is empty, it means the user interrupted the input.
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
namespace Poco::Util
|
||||
{
|
||||
class LayeredConfiguration;
|
||||
class LayeredConfiguration; // NOLINT(cppcoreguidelines-virtual-class-destructor)
|
||||
}
|
||||
|
||||
/// Import extra command line arguments to configuration. These are command line arguments after --.
|
||||
|
@ -93,6 +93,7 @@
|
||||
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
|
||||
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
|
||||
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
|
||||
# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation))
|
||||
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
|
||||
# define NO_SANITIZE_UNDEFINED
|
||||
# define NO_SANITIZE_ADDRESS
|
||||
@ -124,6 +125,39 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
|
||||
/// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader
|
||||
#if defined(__clang__)
|
||||
# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability
|
||||
# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability
|
||||
# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability
|
||||
# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability
|
||||
# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function
|
||||
|
||||
/// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
|
||||
/// Consider adding a comment before using these macros.
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()
|
||||
|
||||
/// This macro is useful when only one thread writes to a member
|
||||
/// and you want to read this member from the same thread without locking a mutex.
|
||||
/// It's safe (because no concurrent writes are possible), but TSA generates a warning.
|
||||
/// (Seems like there's no way to verify it, but it makes sense to distinguish it from TSA_SUPPRESS_WARNING_FOR_READ for readability)
|
||||
# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x)
|
||||
|
||||
#else
|
||||
# define TSA_GUARDED_BY(...)
|
||||
# define TSA_PT_GUARDED_BY(...)
|
||||
# define TSA_REQUIRES(...)
|
||||
# define TSA_REQUIRES_SHARED(...)
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x)
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x)
|
||||
# define TSA_READ_ONE_THREAD(x)
|
||||
#endif
|
||||
|
||||
/// A template function for suppressing warnings about unused variables or function results.
|
||||
template <typename... Args>
|
||||
constexpr void UNUSED(Args &&... args [[maybe_unused]])
|
||||
|
@ -27,6 +27,6 @@ struct FreeingDeleter
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<char, FreeingDeleter> DemangleResult;
|
||||
using DemangleResult = std::unique_ptr<char, FreeingDeleter>;
|
||||
|
||||
DemangleResult tryDemangle(const char * name);
|
||||
|
@ -9,7 +9,7 @@ std::string errnoToString(int code, int the_errno)
|
||||
char buf[buf_size];
|
||||
#ifndef _GNU_SOURCE
|
||||
int rc = strerror_r(the_errno, buf, buf_size);
|
||||
#ifdef __APPLE__
|
||||
#ifdef OS_DARWIN
|
||||
if (rc != 0 && rc != EINVAL)
|
||||
#else
|
||||
if (rc != 0)
|
||||
|
@ -16,7 +16,7 @@ uint64_t getAvailableMemoryAmountOrZero()
|
||||
{
|
||||
#if defined(_SC_PHYS_PAGES) // linux
|
||||
return getPageSize() * sysconf(_SC_PHYS_PAGES);
|
||||
#elif defined(__FreeBSD__)
|
||||
#elif defined(OS_FREEBSD)
|
||||
struct vmtotal vmt;
|
||||
size_t vmt_size = sizeof(vmt);
|
||||
if (sysctlbyname("vm.vmtotal", &vmt, &vmt_size, NULL, 0) == 0)
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#if defined(__linux__) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL)
|
||||
#if defined(OS_LINUX) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL)
|
||||
#define USE_PHDR_CACHE 1
|
||||
#endif
|
||||
|
||||
|
@ -23,10 +23,10 @@ public:
|
||||
constexpr StrongTypedef(): t() {}
|
||||
|
||||
constexpr StrongTypedef(const Self &) = default;
|
||||
constexpr StrongTypedef(Self &&) = default;
|
||||
constexpr StrongTypedef(Self &&) noexcept(std::is_nothrow_move_constructible_v<T>) = default;
|
||||
|
||||
Self & operator=(const Self &) = default;
|
||||
Self & operator=(Self &&) = default;
|
||||
Self & operator=(Self &&) noexcept(std::is_nothrow_move_assignable_v<T>)= default;
|
||||
|
||||
template <class Enable = typename std::is_copy_assignable<T>::type>
|
||||
Self & operator=(const T & rhs) { t = rhs; return *this;}
|
||||
|
@ -1,2 +1,2 @@
|
||||
add_executable (dump_variable dump_variable.cpp)
|
||||
clickhouse_add_executable (dump_variable dump_variable.cpp)
|
||||
target_link_libraries (dump_variable PRIVATE clickhouse_common_io)
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <time.h>
|
||||
#include <ctime>
|
||||
|
||||
#if defined (OS_DARWIN) || defined (OS_SUNOS)
|
||||
# define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <string.h>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
|
||||
|
||||
|
@ -27,6 +27,8 @@
|
||||
#include <type_traits>
|
||||
#include <initializer_list>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace wide
|
||||
{
|
||||
template <size_t Bits, typename Signed>
|
||||
@ -257,4 +259,7 @@ struct hash<wide::integer<Bits, Signed>>;
|
||||
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#include "wide_integer_impl.h"
|
||||
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <boost/multiprecision/cpp_bin_float.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
/// Use same extended double for all platforms
|
||||
#if (LDBL_MANT_DIG == 64)
|
||||
#define CONSTEXPR_FROM_DOUBLE constexpr
|
||||
@ -1478,3 +1480,5 @@ struct hash<wide::integer<Bits, Signed>>
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
@ -260,4 +260,35 @@ TRAP(mq_timedreceive)
|
||||
TRAP(wordexp)
|
||||
TRAP(wordfree)
|
||||
|
||||
/// C11 threading primitives are not supported by ThreadSanitizer.
|
||||
/// Also we should avoid using them for compatibility with old libc.
|
||||
TRAP(thrd_create)
|
||||
TRAP(thrd_equal)
|
||||
TRAP(thrd_current)
|
||||
TRAP(thrd_sleep)
|
||||
TRAP(thrd_yield)
|
||||
TRAP(thrd_exit)
|
||||
TRAP(thrd_detach)
|
||||
TRAP(thrd_join)
|
||||
|
||||
TRAP(mtx_init)
|
||||
TRAP(mtx_lock)
|
||||
TRAP(mtx_timedlock)
|
||||
TRAP(mtx_trylock)
|
||||
TRAP(mtx_unlock)
|
||||
TRAP(mtx_destroy)
|
||||
TRAP(call_once)
|
||||
|
||||
TRAP(cnd_init)
|
||||
TRAP(cnd_signal)
|
||||
TRAP(cnd_broadcast)
|
||||
TRAP(cnd_wait)
|
||||
TRAP(cnd_timedwait)
|
||||
TRAP(cnd_destroy)
|
||||
|
||||
TRAP(tss_create)
|
||||
TRAP(tss_get)
|
||||
TRAP(tss_set)
|
||||
TRAP(tss_delete)
|
||||
|
||||
#endif
|
||||
|
@ -90,6 +90,7 @@
|
||||
#define PCG_EMULATED_128BIT_MATH 1
|
||||
#endif
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace pcg_extras {
|
||||
|
||||
@ -552,4 +553,6 @@ std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
|
||||
|
||||
} // namespace pcg_extras
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // PCG_EXTRAS_HPP_INCLUDED
|
||||
|
@ -113,6 +113,8 @@
|
||||
|
||||
#include "pcg_extras.hpp"
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct PcgSerializer;
|
||||
@ -1777,4 +1779,6 @@ typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast;
|
||||
#pragma warning(default:4146)
|
||||
#endif
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // PCG_RAND_HPP_INCLUDED
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
/* Special width values */
|
||||
enum {
|
||||
widechar_nonprint = -1, // The character is not printable.
|
||||
@ -518,4 +520,6 @@ inline int widechar_wcwidth(wchar_t c) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // WIDECHAR_WIDTH_H
|
||||
|
1
benchmark/README.md
Normal file
1
benchmark/README.md
Normal file
@ -0,0 +1 @@
|
||||
Benchmark is located in a separate repository: https://github.com/ClickHouse/ClickBench
|
@ -1,180 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# script to run query to databases
|
||||
|
||||
function usage()
|
||||
{
|
||||
cat <<EOF
|
||||
usage: $0 options
|
||||
|
||||
This script run benhmark for database
|
||||
|
||||
OPTIONS:
|
||||
-c config file where some script variables are defined
|
||||
-n table name
|
||||
|
||||
-h Show this message
|
||||
-t how many times execute each query. default is '3'
|
||||
-q query file
|
||||
-e expect file
|
||||
-s /etc/init.d/service
|
||||
-p table name pattern to be replaced to name. default is 'hits_10m'
|
||||
EOF
|
||||
}
|
||||
|
||||
TIMES=3
|
||||
table_name_pattern=hits_10m
|
||||
|
||||
while getopts “c:ht:n:q:e:s:r” OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
c)
|
||||
source $OPTARG
|
||||
;;
|
||||
?)
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
OPTIND=1
|
||||
|
||||
while getopts “c:ht:n:q:e:s:r” OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
t)
|
||||
TIMES=$OPTARG
|
||||
;;
|
||||
n)
|
||||
table_name=$OPTARG
|
||||
;;
|
||||
q)
|
||||
test_file=$OPTARG
|
||||
;;
|
||||
e)
|
||||
expect_file=$OPTARG
|
||||
;;
|
||||
s)
|
||||
etc_init_d_service=$OPTARG
|
||||
;;
|
||||
p)
|
||||
table_name_pattern=$OPTARG
|
||||
;;
|
||||
c)
|
||||
;;
|
||||
r)
|
||||
restart_server_each_query=1
|
||||
;;
|
||||
?)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -f $expect_file ]]; then
|
||||
echo "Not found: expect file"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f $test_file ]]; then
|
||||
echo "Not found: test file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f $etc_init_d_service ]]; then
|
||||
echo "Not found: /etc/init.d/service with path=$etc_init_d_service"
|
||||
use_service=0
|
||||
else
|
||||
use_service=1
|
||||
fi
|
||||
|
||||
if [[ "$table_name_pattern" == "" ]]; then
|
||||
echo "Empty table_name_pattern"
|
||||
exit 1
|
||||
fi
|
||||
if [[ "$table_name" == "" ]]; then
|
||||
echo "Empty table_name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
function execute()
|
||||
{
|
||||
queries=("${@}")
|
||||
queries_count=${#queries[@]}
|
||||
|
||||
if [ -z $TIMES ]; then
|
||||
TIMES=1
|
||||
fi
|
||||
|
||||
index=0
|
||||
while [ "$index" -lt "$queries_count" ]; do
|
||||
query=${queries[$index]}
|
||||
|
||||
if [[ $query == "" ]]; then
|
||||
let "index = $index + 1"
|
||||
continue
|
||||
fi
|
||||
|
||||
comment_re='--.*'
|
||||
if [[ $query =~ $comment_re ]]; then
|
||||
echo "$query"
|
||||
echo
|
||||
else
|
||||
sync
|
||||
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
|
||||
|
||||
if [[ "$restart_server_each_query" == "1" && "$use_service" == "1" ]]; then
|
||||
echo "restart server: $etc_init_d_service restart"
|
||||
sudo $etc_init_d_service restart
|
||||
fi
|
||||
|
||||
for i in $(seq $TIMES)
|
||||
do
|
||||
if [[ -f $etc_init_d_service && "$use_service" == "1" ]]; then
|
||||
sudo $etc_init_d_service status
|
||||
server_status=$?
|
||||
expect -f $expect_file ""
|
||||
|
||||
if [[ "$?" != "0" || $server_status != "0" ]]; then
|
||||
echo "restart server: $etc_init_d_service restart"
|
||||
sudo $etc_init_d_service restart
|
||||
fi
|
||||
|
||||
#wait until can connect to server
|
||||
restart_timer=0
|
||||
restart_limit=60
|
||||
expect -f $expect_file "" &> /dev/null
|
||||
while [ "$?" != "0" ]; do
|
||||
echo "waiting"
|
||||
sleep 1
|
||||
let "restart_timer = $restart_timer + 1"
|
||||
if (( $restart_limit < $restart_timer )); then
|
||||
sudo $etc_init_d_service restart
|
||||
restart_timer=0
|
||||
fi
|
||||
expect -f $expect_file "" &> /dev/null
|
||||
done
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "times: $i"
|
||||
|
||||
echo "query:" "$query"
|
||||
expect -f $expect_file "$query"
|
||||
|
||||
done
|
||||
fi
|
||||
|
||||
let "index = $index + 1"
|
||||
done
|
||||
}
|
||||
|
||||
temp_test_file=temp_queries_$table_name
|
||||
cat $test_file | sed s/$table_name_pattern/$table_name/g > $temp_test_file
|
||||
mapfile -t test_queries < $temp_test_file
|
||||
|
||||
echo "start time: $(date)"
|
||||
time execute "${test_queries[@]}"
|
||||
echo "stop time: $(date)"
|
@ -1,22 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
while true; do
|
||||
RES=$(command time -f %e -o /dev/stdout curl -sS -G --data-urlencode "query=$query" --data "default_format=Null&max_memory_usage=100000000000&max_memory_usage_for_all_queries=100000000000&max_concurrent_queries_for_user=100&database=*$YT_CLIQUE_ID" --location-trusted -H "Authorization: OAuth $YT_TOKEN" "$YT_PROXY.yt.yandex.net/query" 2>/dev/null);
|
||||
if [[ $? == 0 ]]; then
|
||||
[[ $RES =~ 'fail|Exception' ]] || break;
|
||||
fi
|
||||
done
|
||||
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
if [ -x ./clickhouse ]
|
||||
then
|
||||
CLICKHOUSE_CLIENT="./clickhouse client"
|
||||
elif command -v clickhouse-client >/dev/null 2>&1
|
||||
then
|
||||
CLICKHOUSE_CLIENT="clickhouse-client"
|
||||
else
|
||||
echo "clickhouse-client is not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
sync
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
RES=$(${CLICKHOUSE_CLIENT} --time --format=Null --max_memory_usage=100G --query="$query" 2>&1)
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
while true; do
|
||||
RES=$(command time -f %e -o time ./yql --clickhouse --syntax-version 1 -f empty <<< "USE chyt.hume; PRAGMA max_memory_usage = 100000000000; PRAGMA max_memory_usage_for_all_queries = 100000000000; $query" >/dev/null 2>&1 && cat time) && break;
|
||||
done
|
||||
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
@ -1,43 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
PARAMS="--host ... --secure --password ..."
|
||||
|
||||
if [ -x ./clickhouse ]
|
||||
then
|
||||
CLICKHOUSE_CLIENT="./clickhouse client"
|
||||
elif command -v clickhouse-client >/dev/null 2>&1
|
||||
then
|
||||
CLICKHOUSE_CLIENT="clickhouse-client"
|
||||
else
|
||||
echo "clickhouse-client is not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
QUERY_ID_PREFIX="benchmark_$RANDOM"
|
||||
QUERY_NUM=1
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query
|
||||
do
|
||||
for i in $(seq 1 $TRIES)
|
||||
do
|
||||
QUERY_ID="${QUERY_ID_PREFIX}_${QUERY_NUM}_${i}"
|
||||
${CLICKHOUSE_CLIENT} ${PARAMS} --query_id "${QUERY_ID}" --format=Null --max_memory_usage=100G --query="$query"
|
||||
echo -n '.'
|
||||
done
|
||||
QUERY_NUM=$((QUERY_NUM + 1))
|
||||
echo
|
||||
done
|
||||
|
||||
sleep 10
|
||||
|
||||
${CLICKHOUSE_CLIENT} ${PARAMS} --query "
|
||||
WITH extractGroups(query_id, '(\d+)_(\d+)\$') AS num_run, num_run[1]::UInt8 AS num, num_run[2]::UInt8 AS run
|
||||
SELECT groupArrayInsertAt(query_duration_ms / 1000, (run - 1)::UInt8)::String || ','
|
||||
FROM clusterAllReplicas(default, system.query_log)
|
||||
WHERE event_date >= yesterday() AND type = 2 AND query_id LIKE '${QUERY_ID_PREFIX}%'
|
||||
GROUP BY num ORDER BY num FORMAT TSV
|
||||
"
|
@ -1,43 +0,0 @@
|
||||
SELECT count() FROM {table};
|
||||
SELECT count() FROM {table} WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM {table} ;
|
||||
SELECT sum(UserID) FROM {table} ;
|
||||
SELECT uniq(UserID) FROM {table} ;
|
||||
SELECT uniq(SearchPhrase) FROM {table} ;
|
||||
SELECT min(EventDate), max(EventDate) FROM {table} ;
|
||||
SELECT AdvEngineID, count() FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC;
|
||||
SELECT RegionID, uniq(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
SELECT MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, uniq(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchEngineID, SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10;
|
||||
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890;
|
||||
SELECT count() FROM {table} WHERE URL LIKE '%metrika%';
|
||||
SELECT SearchPhrase, any(URL), count() AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
|
||||
SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM {table} GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;
|
@ -1,3 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
table=hits_10m; time clickhouse-client --max_bytes_before_external_sort=30000000000 --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID FROM $table ORDER BY rand()" | corrector_utf8 > /opt/dumps/${table}_corrected.tsv
|
15906
benchmark/duckdb/log
15906
benchmark/duckdb/log
File diff suppressed because one or more lines are too long
@ -1,43 +0,0 @@
|
||||
SELECT count(*) FROM hits;
|
||||
SELECT count(*) FROM hits WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
|
||||
SELECT sum(UserID) FROM hits;
|
||||
SELECT COUNT(DISTINCT UserID) FROM hits;
|
||||
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
|
||||
SELECT min(EventDate), max(EventDate) FROM hits;
|
||||
SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
SELECT UserID, extract(minute FROM (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID FROM hits WHERE UserID = 12345678901234567890;
|
||||
SELECT count(*) FROM hits WHERE URL::TEXT LIKE '%metrika%';
|
||||
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL::TEXT LIKE '%metrika%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title::TEXT LIKE '%Яндекс%' AND URL::TEXT NOT LIKE '%.yandex.%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM hits WHERE URL::TEXT LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
SELECT CounterID, avg(octet_length(URL)) AS l, count(*) AS c FROM hits WHERE octet_length(URL) > 0 GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT regexp_replace(Referer::TEXT, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(octet_length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE octet_length(Referer) > 0 GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(URL) > 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(Title) > 0 GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) ORDER BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime)));
|
File diff suppressed because one or more lines are too long
@ -1,43 +0,0 @@
|
||||
Folder structure
|
||||
______________
|
||||
dump_dataset_from_ch.sh - bash script that dumps a dataset from Clickhouse
|
||||
schema.sql - schema for a Greenplum cluster to load dumped dataset in
|
||||
load_data_set.sql - the script that loads up a dumped dataset
|
||||
queries.sql - SQL statements used in the benchmark
|
||||
benchmark.sh - this piece of bash conducts a benchmark
|
||||
result_parser.py - script to parse benchmark.sh's output and produce python code to build a graph to compare up to 4 benchmark results.
|
||||
Requirements
|
||||
____________
|
||||
|
||||
Greenplum uses a separate server as a point of entry, so you need 2 servers at least to run a cluster: master and segment hosts. 2 segments host and 56 segments(28 per host) had been used while conducting the test.
|
||||
You has has to put segment hostnames in the benchmark.sh.
|
||||
Greenplum quick installation instructions
|
||||
_________________________________________
|
||||
|
||||
Obtain a stable Greenplum version here(4.3.9.1 was used while conducting the benchmark):
|
||||
https://network.pivotal.io/products/pivotal-gpdb
|
||||
|
||||
and install it using this detailed guide:
|
||||
http://gpdb.docs.pivotal.io/4340/install_guide/install_guide.html
|
||||
|
||||
You should change gp_interconnect_type to 'tcp' if cluster members are connected via 1GB link or lower.
|
||||
There are some variables that has to be changed prior the first benchmark run: gp_vmem_protect_limit and max_statement_mem to allow each segment to use more virtual memory. Here are commands to change this GUCS that has to be executed as gpadmin at the master host:
|
||||
|
||||
gpconfig -c gp_interconnect_type -v tcp
|
||||
gpconfig -c gp_vmem_protect_limit -v 3000
|
||||
gpconfig -c max_statement_mem -v '4000MB'
|
||||
|
||||
How to prepare data
|
||||
-------------------
|
||||
|
||||
One can prepare datasets to run the benchmark on using dump_dataset_from_ch.sh script from this repo. The script has to be run at at Clickhouse host. It takes a long time to get dumps.
|
||||
|
||||
Upload the datasets into Greenplum master.Then run schema.sql to prepare schema and load_data_set.sql to load data up. This operation also takes a long time.
|
||||
|
||||
How to conduct the benchmark
|
||||
__________________________
|
||||
There is a benchmark.sh that take some arguments. Here is the syntax:
|
||||
|
||||
./benchmark.sh sql_statements_file tablename dbname orca_switch
|
||||
|
||||
If you don't know about the last one then just use a default value.
|
@ -1,30 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
filename=${1-queries.sql}
|
||||
table=$2
|
||||
dbname=$3
|
||||
orca=${4-on}
|
||||
host1=somehost
|
||||
host2=somehost
|
||||
mem='15GB'
|
||||
cat $filename | sed "s/{table}/$table/g" | while read query ;
|
||||
do
|
||||
ssh -n $host1 'echo 3 | tee /proc/sys/vm/drop_caches; sync' > /dev/null
|
||||
ssh -n $host2 'echo 3 | tee /proc/sys/vm/drop_caches; sync' > /dev/null
|
||||
sleep 5
|
||||
echo $query | egrep "SELECT UserID, date_trunc\('minute', EventTime\) AS m|SELECT Referer AS key, avg\(length\(Referer\)\) AS l|SELECT URL, count(1) AS c FROM.*GROUP BY URL|SELECT 1, URL, count\(1\) AS c FROM.*GROUP BY 1" && mem='10GB'
|
||||
echo $query | egrep 'SELECT DISTINCT|GROUP BY UserID, SearchPhrase LIMIT 10|count\(DISTINCT UserID\) AS u' && mem='5GB'
|
||||
echo "####################"
|
||||
echo "$query"
|
||||
echo "Timestamp_begin:$(date)"
|
||||
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='256MB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
|
||||
echo "Timestamp_end:$(date)"
|
||||
echo "Timestamp_begin:$(date)"
|
||||
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='50GB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
|
||||
echo "Timestamp_end:$(date)"
|
||||
echo "Timestamp_begin:$(date)"
|
||||
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='50GB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
|
||||
echo "Timestamp_end:$(date)"
|
||||
echo "$query"
|
||||
echo '####################'
|
||||
done
|
@ -1,5 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
for table in hits_10m_single hits_100m_single hits_1000m_single; do
|
||||
clickhouse-client -q "SELECT (round(WatchID/2), JavaEnable, Title, GoodEvent, EventTime, EventDate, CounterID, ClientIP, RegionID,round(UserID/2), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, ClientEventTime, SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce,round(FUniqID/2), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, LocalEventTime, Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID,round(RefererHash/2),round(URLHash/2), CLID) FROM $table FORMAT CSV" > $table
|
||||
done
|
@ -1,12 +0,0 @@
|
||||
COPY hits_all_10m FROM '/data/hits_10m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
|
||||
CREATE INDEX pk_counterid_eventdate_userid_10m ON hits_all_10m USING btree (counterid, eventdate, userid);
|
||||
CREATE INDEX idx_10m_counterid on hits_all_10m using btree (counterid); CREATE INDEX idx_10m_userid on hits_all_10m using btree (userid);
|
||||
ANALYZE hits_all_10m;
|
||||
COPY hits_all_100m from '/data/hits_100m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
|
||||
CREATE INDEX pk_counterid_eventdate_userid_100m ON hits_all_100m USING btree (counterid, eventdate, userid);
|
||||
CREATE INDEX idx_100m_counterid on hits_all_100m using btree (counterid); CREATE INDEX idx_100m_userid on hits_all_100m using btree (userid);
|
||||
ANALYZE hits_all_100m;
|
||||
COPY hits_all_1000m from '/data/hits_1000m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
|
||||
CREATE INDEX pk_counterid_eventdate_userid_1000m ON hits_all_1000m USING btree (counterid, eventdate, userid);
|
||||
CREATE INDEX idx_1000m_counterid on hits_all_1000m using btree (counterid); CREATE INDEX idx_1000m_userid on hits_all_1000m using btree (userid);
|
||||
ANALYZE hits_all_1000m;
|
@ -1,43 +0,0 @@
|
||||
SELECT count(1) FROM {table}
|
||||
SELECT count(1) FROM {table} WHERE AdvEngineID != 0
|
||||
SELECT sum(AdvEngineID), count(1), avg(ResolutionWidth) FROM {table}
|
||||
SELECT sum(UserID) FROM {table}
|
||||
SELECT count(UserID) FROM ( SELECT DISTINCT UserID FROM {table} ) AS d
|
||||
SELECT count(SearchPhrase) FROM ( SELECT DISTINCT SearchPhrase FROM {table} ) AS d
|
||||
SELECT min(EventDate), max(EventDate) FROM {table}
|
||||
SELECT AdvEngineID, count(1) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY 2 DESC
|
||||
SELECT RegionID, count(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10
|
||||
SELECT RegionID, sum(AdvEngineID), count(1) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10
|
||||
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10
|
||||
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10
|
||||
SELECT SearchPhrase, count(1) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10
|
||||
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10
|
||||
SELECT SearchEngineID, SearchPhrase, count(1) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10
|
||||
SELECT UserID, count(1) FROM {table} GROUP BY UserID ORDER BY 2 DESC LIMIT 10
|
||||
SELECT UserID, SearchPhrase, count(1) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY 3 DESC LIMIT 10
|
||||
SELECT UserID, SearchPhrase, count(1) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10
|
||||
SELECT UserID, date_trunc('minute', EventTime) AS m, SearchPhrase, count(1) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(1) DESC LIMIT 10
|
||||
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890
|
||||
SELECT count(1) FROM {table} WHERE URL LIKE '%metrika%'
|
||||
SELECT SearchPhrase, max(URL) as URL, count(1) AS c FROM {table} h WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10
|
||||
SELECT SearchPhrase, max(URL) as URL, min(Title) as Title, count(1) AS c, count(DISTINCT UserID) FROM {table} WHERE Title LIKE '%\xd0\xaf\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT count(1) FROM {table}
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(1) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(1) > 100000 ORDER BY l DESC LIMIT 25
|
||||
SELECT Referer AS key, avg(length(Referer)) AS l, count(1) AS c, Referer FROM {table} WHERE Referer != '' GROUP BY key HAVING count(1) > 100000 ORDER BY l DESC LIMIT 25
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table}
|
||||
SELECT SearchEngineID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10
|
||||
SELECT WatchID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10
|
||||
SELECT WatchID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10
|
||||
SELECT URL, count(1) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10
|
||||
SELECT 1, URL, count(1) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10
|
||||
SELECT ClientIP AS x, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(1) AS c FROM {table} GROUP BY x, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10
|
||||
SELECT URL, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND DontCountHits =0 AND Refresh = 0 AND URL <>'' GROUP BY URL ORDER BY PageViews DESC LIMIT 10
|
||||
SELECT Title, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate BETWEEN '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND DontCountHits=0 AND Refresh=0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10
|
||||
SELECT URL, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, case when (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END Src, URL AS Dst, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND TraficSourceID IN (-1, 6) AND RefererHash = 7135345792483900000 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND DontCountHits =0 AND URLHash = 7135345792483900000 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT date_trunc('minute', EventTime) AS Minute, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND DontCountHits =0 GROUP BY Minute ORDER BY Minute;
|
@ -1,150 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import sys
|
||||
import json
|
||||
|
||||
|
||||
def parse_block(block=[], options=[]):
|
||||
|
||||
# print('block is here', block)
|
||||
# show_query = False
|
||||
# show_query = options.show_query
|
||||
result = []
|
||||
query = block[0].strip()
|
||||
if len(block) > 4:
|
||||
timing1 = block[1].strip().split()[1]
|
||||
timing2 = block[3].strip().split()[1]
|
||||
timing3 = block[5].strip().split()[1]
|
||||
else:
|
||||
timing1 = block[1].strip().split()[1]
|
||||
timing2 = block[2].strip().split()[1]
|
||||
timing3 = block[3].strip().split()[1]
|
||||
if options.show_queries:
|
||||
result.append(query)
|
||||
if not options.show_first_timings:
|
||||
result += [timing1, timing2, timing3]
|
||||
else:
|
||||
result.append(timing1)
|
||||
return result
|
||||
|
||||
|
||||
def read_stats_file(options, fname):
|
||||
result = []
|
||||
int_result = []
|
||||
block = []
|
||||
time_count = 1
|
||||
with open(fname) as f:
|
||||
|
||||
for line in f.readlines():
|
||||
|
||||
if "SELECT" in line:
|
||||
if len(block) > 1:
|
||||
result.append(parse_block(block, options))
|
||||
block = [line]
|
||||
elif "Time:" in line:
|
||||
block.append(line)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def compare_stats_files(options, arguments):
|
||||
result = []
|
||||
file_output = []
|
||||
pyplot_colors = ["y", "b", "g", "r"]
|
||||
for fname in arguments[1:]:
|
||||
file_output.append((read_stats_file(options, fname)))
|
||||
if len(file_output[0]) > 0:
|
||||
timings_count = len(file_output[0])
|
||||
for idx, data_set in enumerate(file_output):
|
||||
int_result = []
|
||||
for timing in data_set:
|
||||
int_result.append(float(timing[0])) # y values
|
||||
result.append(
|
||||
[
|
||||
[x for x in range(0, len(int_result))],
|
||||
int_result,
|
||||
pyplot_colors[idx] + "^",
|
||||
]
|
||||
)
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_args():
|
||||
from optparse import OptionParser
|
||||
|
||||
parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
|
||||
parser.add_option(
|
||||
"-q",
|
||||
"--show-queries",
|
||||
help="Show statements along with timings",
|
||||
action="store_true",
|
||||
dest="show_queries",
|
||||
)
|
||||
parser.add_option(
|
||||
"-f",
|
||||
"--show-first-timings",
|
||||
help="Show only first tries timings",
|
||||
action="store_true",
|
||||
dest="show_first_timings",
|
||||
)
|
||||
parser.add_option(
|
||||
"-c",
|
||||
"--compare-mode",
|
||||
help="Prepare output for pyplot comparing result files.",
|
||||
action="store",
|
||||
dest="compare_mode",
|
||||
)
|
||||
(options, arguments) = parser.parse_args(sys.argv)
|
||||
if len(arguments) < 2:
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
return (options, arguments)
|
||||
|
||||
|
||||
def gen_pyplot_code(options, arguments):
|
||||
result = ""
|
||||
data_sets = compare_stats_files(options, arguments)
|
||||
for idx, data_set in enumerate(data_sets, start=0):
|
||||
x_values, y_values, line_style = data_set
|
||||
result += "\nplt.plot("
|
||||
result += "%s, %s, '%s'" % (x_values, y_values, line_style)
|
||||
result += ", label='%s try')" % idx
|
||||
print("import matplotlib.pyplot as plt")
|
||||
print(result)
|
||||
print("plt.xlabel('Try number')")
|
||||
print("plt.ylabel('Timing')")
|
||||
print("plt.title('Benchmark query timings')")
|
||||
print("plt.legend()")
|
||||
print("plt.show()")
|
||||
|
||||
|
||||
def gen_html_json(options, arguments):
|
||||
tuples = read_stats_file(options, arguments[1])
|
||||
print("{")
|
||||
print('"system: GreenPlum(x2),')
|
||||
print(('"version": "%s",' % "4.3.9.1"))
|
||||
print('"data_size": 10000000,')
|
||||
print('"time": "",')
|
||||
print('"comments": "",')
|
||||
print('"result":')
|
||||
print("[")
|
||||
for s in tuples:
|
||||
print(s)
|
||||
print("]")
|
||||
print("}")
|
||||
|
||||
|
||||
def main():
|
||||
(options, arguments) = parse_args()
|
||||
if len(arguments) > 2:
|
||||
gen_pyplot_code(options, arguments)
|
||||
else:
|
||||
gen_html_json(options, arguments)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,3 +0,0 @@
|
||||
CREATE TABLE hits_all_10m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column, compresstype=quicklz) DISTRIBUTED BY (userid) ;
|
||||
CREATE TABLE hits_all_100m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column, compresstype=quicklz) DISTRIBUTED BY (userid) ;
|
||||
CREATE TABLE hits_all_1000m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column,compresstype=quicklz) DISTRIBUTED BY (userid) ;
|
@ -1,189 +0,0 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
if [[ -n $1 ]]; then
|
||||
SCALE=$1
|
||||
else
|
||||
SCALE=100
|
||||
fi
|
||||
|
||||
TABLE="hits_${SCALE}m_obfuscated"
|
||||
DATASET="${TABLE}_v1.tar.xz"
|
||||
QUERIES_FILE="queries.sql"
|
||||
TRIES=3
|
||||
|
||||
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'.
|
||||
|
||||
FASTER_DOWNLOAD=wget
|
||||
if command -v axel >/dev/null; then
|
||||
FASTER_DOWNLOAD=axel
|
||||
else
|
||||
echo "It's recommended to install 'axel' for faster downloads."
|
||||
fi
|
||||
|
||||
if command -v pixz >/dev/null; then
|
||||
TAR_PARAMS='-Ipixz'
|
||||
else
|
||||
echo "It's recommended to install 'pixz' for faster decompression of the dataset."
|
||||
fi
|
||||
|
||||
mkdir -p clickhouse-benchmark-$SCALE
|
||||
pushd clickhouse-benchmark-$SCALE
|
||||
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
|
||||
DIR=
|
||||
|
||||
if [ "${OS}" = "Linux" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="amd64"
|
||||
elif [ "${ARCH}" = "aarch64" ]
|
||||
then
|
||||
DIR="aarch64"
|
||||
elif [ "${ARCH}" = "powerpc64le" ]
|
||||
then
|
||||
DIR="powerpc64le"
|
||||
fi
|
||||
elif [ "${OS}" = "FreeBSD" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="freebsd"
|
||||
elif [ "${ARCH}" = "aarch64" ]
|
||||
then
|
||||
DIR="freebsd-aarch64"
|
||||
elif [ "${ARCH}" = "powerpc64le" ]
|
||||
then
|
||||
DIR="freebsd-powerpc64le"
|
||||
fi
|
||||
elif [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="macos"
|
||||
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
|
||||
then
|
||||
DIR="macos-aarch64"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "${DIR}" ]
|
||||
then
|
||||
echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
|
||||
echo
|
||||
echo "Will download ${URL}"
|
||||
echo
|
||||
curl -O "${URL}" && chmod a+x clickhouse || exit 1
|
||||
echo
|
||||
echo "Successfully downloaded the ClickHouse binary"
|
||||
|
||||
chmod a+x clickhouse
|
||||
|
||||
if [[ ! -f $QUERIES_FILE ]]; then
|
||||
wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/$QUERIES_FILE"
|
||||
fi
|
||||
|
||||
if [[ ! -d data ]]; then
|
||||
if [[ ! -f $DATASET ]]; then
|
||||
$FASTER_DOWNLOAD "https://datasets.clickhouse.com/hits/partitions/$DATASET"
|
||||
fi
|
||||
|
||||
tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET
|
||||
fi
|
||||
|
||||
uptime
|
||||
|
||||
echo "Starting clickhouse-server"
|
||||
|
||||
./clickhouse server > server.log 2>&1 &
|
||||
PID=$!
|
||||
|
||||
function finish {
|
||||
kill $PID
|
||||
wait
|
||||
}
|
||||
trap finish EXIT
|
||||
|
||||
echo "Waiting for clickhouse-server to start"
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM $TABLE" 2>/dev/null && break || echo '.'
|
||||
if [[ $i == 30 ]]; then exit 1; fi
|
||||
done
|
||||
|
||||
echo
|
||||
echo "Will perform benchmark. Results:"
|
||||
echo
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
sync
|
||||
if [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
sudo purge > /dev/null
|
||||
else
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
|
||||
fi
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
RES=$(./clickhouse client --max_memory_usage 100G --time --format=Null --query="$query" 2>&1 ||:)
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
||||
|
||||
|
||||
echo
|
||||
echo "Benchmark complete. System info:"
|
||||
echo
|
||||
|
||||
if [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
echo '----Version, build id-----------'
|
||||
./clickhouse local --query "SELECT format('Version: {}', version())"
|
||||
sw_vers | grep BuildVersion
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
sysctl hw.model
|
||||
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize'
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
echo '----Memory Free and Total-------'
|
||||
vm_stat
|
||||
echo '----Physical Memory Amount------'
|
||||
ls -l /var/vm
|
||||
echo '--------------------------------'
|
||||
else
|
||||
echo '----Version, build id-----------'
|
||||
./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())"
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
cat /proc/cpuinfo | grep -i -F 'model name' | uniq
|
||||
lscpu
|
||||
echo '----Block Devices---------------'
|
||||
lsblk
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
echo '----Memory Free and Total-------'
|
||||
free -h
|
||||
echo '----Physical Memory Amount------'
|
||||
cat /proc/meminfo | grep MemTotal
|
||||
echo '----RAID Info-------------------'
|
||||
cat /proc/mdstat
|
||||
#echo '----PCI-------------------------'
|
||||
#lspci
|
||||
#echo '----All Hardware Info-----------'
|
||||
#lshw
|
||||
echo '--------------------------------'
|
||||
fi
|
||||
echo
|
@ -1,4 +0,0 @@
|
||||
CONF_DIR=/home/kartavyy/benchmark/hive
|
||||
expect_file=$CONF_DIR/expect.tcl
|
||||
test_file=$CONF_DIR/queries.sql
|
||||
etc_init_d_service=
|
@ -1,9 +0,0 @@
|
||||
create table hits_10m_raw ( WatchID BIGINT, JavaEnable SMALLINT, Title STRING, GoodEvent SMALLINT, EventTime TIMESTAMP, EventDate TIMESTAMP, CounterID BIGINT, ClientIP BIGINT, RegionID BIGINT, UserID BIGINT, CounterClass TINYINT, OS SMALLINT, UserAgent SMALLINT, URL STRING, Referer STRING, Refresh TINYINT, RefererCategoryID INT, RefererRegionID BIGINT, URLCategoryID INT, URLRegionID BIGINT, ResolutionWidth INT, ResolutionHeight INT, ResolutionDepth SMALLINT, FlashMajor SMALLINT, FlashMinor SMALLINT, FlashMinor2 STRING, NetMajor SMALLINT, NetMinor SMALLINT, UserAgentMajor INT, UserAgentMinor STRING, CookieEnable SMALLINT, JavascriptEnable SMALLINT, IsMobile SMALLINT, MobilePhone SMALLINT, MobilePhoneModel STRING, Params STRING, IPNetworkID BIGINT, TraficSourceID SMALLINT, SearchEngineID INT, SearchPhrase STRING, AdvEngineID SMALLINT, IsArtifical SMALLINT, WindowClientWidth INT, WindowClientHeight INT, ClientTimeZone INT, ClientEventTime TIMESTAMP, SilverlightVersion1 SMALLINT, SilverlightVersion2 SMALLINT, SilverlightVersion3 BIGINT, SilverlightVersion4 INT, PageCharset STRING, CodeVersion BIGINT, IsLink SMALLINT, IsDownload SMALLINT, IsNotBounce SMALLINT, FUniqID BIGINT, OriginalURL STRING, HID BIGINT, IsOldCounter SMALLINT, IsEvent SMALLINT, IsParameter SMALLINT, DontCountHits SMALLINT, WithHash SMALLINT, HitColor STRING, LocalEventTime TIMESTAMP, Age SMALLINT, Sex SMALLINT, Income SMALLINT, Interests INT, Robotness SMALLINT, RemoteIP BIGINT, WindowName INT, OpenerName INT, HistoryLength SMALLINT, BrowserLanguage STRING, BrowserCountry STRING, SocialNetwork STRING, SocialAction STRING, HTTPError INT, SendTiming BIGINT, DNSTiming BIGINT, ConnectTiming BIGINT, ResponseStartTiming BIGINT, ResponseEndTiming BIGINT, FetchTiming BIGINT, SocialSourceNetworkID SMALLINT, SocialSourcePage STRING, ParamPrice BIGINT, ParamOrderID STRING, ParamCurrency STRING, ParamCurrencyID INT, OpenstatServiceName STRING, OpenstatCampaignID STRING, OpenstatAdID STRING, OpenstatSourceID STRING, UTMSource STRING, UTMMedium STRING, UTMCampaign STRING, UTMContent STRING, UTMTerm STRING, FromTag STRING, HasGCLID SMALLINT, RefererHash BIGINT, URLHash BIGINT, CLID BIGINT, UserIDHash BIGINT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;
|
||||
|
||||
load data local inpath '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' overwrite into table hits_10m_raw;
|
||||
|
||||
create table hits_10m ( WatchID BIGINT, JavaEnable SMALLINT, Title STRING, GoodEvent SMALLINT, EventTime TIMESTAMP, EventDate TIMESTAMP, CounterID BIGINT, ClientIP BIGINT, RegionID BIGINT, UserID BIGINT, CounterClass TINYINT, OS SMALLINT, UserAgent SMALLINT, URL STRING, Referer STRING, Refresh TINYINT, RefererCategoryID INT, RefererRegionID BIGINT, URLCategoryID INT, URLRegionID BIGINT, ResolutionWidth INT, ResolutionHeight INT, ResolutionDepth SMALLINT, FlashMajor SMALLINT, FlashMinor SMALLINT, FlashMinor2 STRING, NetMajor SMALLINT, NetMinor SMALLINT, UserAgentMajor INT, UserAgentMinor STRING, CookieEnable SMALLINT, JavascriptEnable SMALLINT, IsMobile SMALLINT, MobilePhone SMALLINT, MobilePhoneModel STRING, Params STRING, IPNetworkID BIGINT, TraficSourceID SMALLINT, SearchEngineID INT, SearchPhrase STRING, AdvEngineID SMALLINT, IsArtifical SMALLINT, WindowClientWidth INT, WindowClientHeight INT, ClientTimeZone INT, ClientEventTime TIMESTAMP, SilverlightVersion1 SMALLINT, SilverlightVersion2 SMALLINT, SilverlightVersion3 BIGINT, SilverlightVersion4 INT, PageCharset STRING, CodeVersion BIGINT, IsLink SMALLINT, IsDownload SMALLINT, IsNotBounce SMALLINT, FUniqID BIGINT, OriginalURL STRING, HID BIGINT, IsOldCounter SMALLINT, IsEvent SMALLINT, IsParameter SMALLINT, DontCountHits SMALLINT, WithHash SMALLINT, HitColor STRING, LocalEventTime TIMESTAMP, Age SMALLINT, Sex SMALLINT, Income SMALLINT, Interests INT, Robotness SMALLINT, RemoteIP BIGINT, WindowName INT, OpenerName INT, HistoryLength SMALLINT, BrowserLanguage STRING, BrowserCountry STRING, SocialNetwork STRING, SocialAction STRING, HTTPError INT, SendTiming BIGINT, DNSTiming BIGINT, ConnectTiming BIGINT, ResponseStartTiming BIGINT, ResponseEndTiming BIGINT, FetchTiming BIGINT, SocialSourceNetworkID SMALLINT, SocialSourcePage STRING, ParamPrice BIGINT, ParamOrderID STRING, ParamCurrency STRING, ParamCurrencyID INT, OpenstatServiceName STRING, OpenstatCampaignID STRING, OpenstatAdID STRING, OpenstatSourceID STRING, UTMSource STRING, UTMMedium STRING, UTMCampaign STRING, UTMContent STRING, UTMTerm STRING, FromTag STRING, HasGCLID SMALLINT, RefererHash BIGINT, URLHash BIGINT, CLID BIGINT, UserIDHash BIGINT ) CLUSTERED BY (EventDate) SORTED BY(CounterID, EventDate, UserIDHash, EventTime) INTO 10 BUCKETS STORED AS ORC tblproperties("orc.compress"="ZLIB");
|
||||
|
||||
insert overwrite table hits_10m select * from hits_10m_raw;
|
||||
|
||||
--drop table hits_10m_raw;
|
@ -1,18 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#!/bin/expect
|
||||
|
||||
# Set timeout
|
||||
set timeout 600
|
||||
|
||||
# Get arguments
|
||||
set query [lindex $argv 0]
|
||||
|
||||
spawn hive
|
||||
|
||||
expect "hive>"
|
||||
send "$query;\r"
|
||||
|
||||
expect "hive>"
|
||||
send "quit;\r"
|
||||
|
||||
expect eof
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,624 +0,0 @@
|
||||
start time: Вт. сент. 10 18:46:00 MSK 2013
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_15579@mturlrep13_201309101846_67163557.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT count(*) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_16038@mturlrep13_201309101846_623079473.txt
|
||||
hive> SELECT count(*) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0036
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:46:20,061 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:46:27,089 Stage-1 map = 7%, reduce = 0%
|
||||
2013-09-10 18:46:33,113 Stage-1 map = 14%, reduce = 0%
|
||||
2013-09-10 18:46:36,127 Stage-1 map = 22%, reduce = 0%
|
||||
2013-09-10 18:46:39,143 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:40,149 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:41,156 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:42,162 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:43,168 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:44,174 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:45,179 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:46,185 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:47,191 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:48,197 Stage-1 map = 43%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:49,205 Stage-1 map = 47%, reduce = 0%, Cumulative CPU 62.51 sec
|
||||
2013-09-10 18:46:50,211 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:51,217 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:52,222 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:53,227 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:54,233 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:55,238 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:56,244 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:57,250 Stage-1 map = 54%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:58,255 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:59,261 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:00,266 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:01,272 Stage-1 map = 61%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:02,277 Stage-1 map = 61%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:03,282 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:04,287 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:05,305 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:06,310 Stage-1 map = 69%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:07,316 Stage-1 map = 73%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:08,321 Stage-1 map = 73%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:09,326 Stage-1 map = 76%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:10,331 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:11,336 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:12,341 Stage-1 map = 84%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:13,346 Stage-1 map = 88%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:14,351 Stage-1 map = 88%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:15,356 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
|
||||
2013-09-10 18:47:16,372 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
|
||||
2013-09-10 18:47:17,379 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
|
||||
2013-09-10 18:47:18,384 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 118.21 sec
|
||||
2013-09-10 18:47:19,388 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 162.76 sec
|
||||
2013-09-10 18:47:20,393 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 162.76 sec
|
||||
2013-09-10 18:47:21,397 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 162.76 sec
|
||||
2013-09-10 18:47:22,404 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
|
||||
2013-09-10 18:47:23,410 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
|
||||
2013-09-10 18:47:24,415 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
|
||||
MapReduce Total cumulative CPU time: 2 minutes 45 seconds 270 msec
|
||||
Ended Job = job_201309101627_0036
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 165.27 sec HDFS Read: 1082943442 HDFS Write: 9 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 2 minutes 45 seconds 270 msec
|
||||
OK
|
||||
10000000
|
||||
Time taken: 74.228 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_17475@mturlrep13_201309101847_1783698271.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_17882@mturlrep13_201309101847_1295809350.txt
|
||||
hive> SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0037
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:47:44,058 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:47:49,086 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:50,093 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:51,101 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:52,107 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:53,113 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:54,119 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 18.18 sec
|
||||
2013-09-10 18:47:55,125 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.81 sec
|
||||
2013-09-10 18:47:56,130 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.81 sec
|
||||
2013-09-10 18:47:57,138 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
|
||||
2013-09-10 18:47:58,144 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
|
||||
2013-09-10 18:47:59,150 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
|
||||
MapReduce Total cumulative CPU time: 25 seconds 640 msec
|
||||
Ended Job = job_201309101627_0037
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 25.64 sec HDFS Read: 907716 HDFS Write: 7 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 25 seconds 640 msec
|
||||
OK
|
||||
171127
|
||||
Time taken: 25.153 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_19147@mturlrep13_201309101848_1891179156.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_19567@mturlrep13_201309101848_690102300.txt
|
||||
hive> SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0038
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:48:18,837 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:48:25,865 Stage-1 map = 39%, reduce = 0%
|
||||
2013-09-10 18:48:26,875 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:27,882 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:28,889 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:29,895 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:30,901 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:31,907 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:32,914 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:33,920 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:34,925 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:35,930 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:36,935 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:37,940 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:38,945 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 35.24 sec
|
||||
2013-09-10 18:48:39,952 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 36.63 sec
|
||||
2013-09-10 18:48:40,958 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 36.63 sec
|
||||
MapReduce Total cumulative CPU time: 36 seconds 630 msec
|
||||
Ended Job = job_201309101627_0038
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 36.63 sec HDFS Read: 8109219 HDFS Write: 30 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 36 seconds 630 msec
|
||||
OK
|
||||
Time taken: 31.961 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_20898@mturlrep13_201309101848_327652001.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT sum(UserID) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_21336@mturlrep13_201309101848_1975614127.txt
|
||||
hive> SELECT sum(UserID) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0039
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:49:00,561 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:49:07,617 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:08,626 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:09,634 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:10,639 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:11,646 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:12,652 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:13,658 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 21.86 sec
|
||||
2013-09-10 18:49:14,664 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:15,670 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:16,675 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:17,680 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:18,685 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:19,690 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:20,697 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 32.07 sec
|
||||
2013-09-10 18:49:21,703 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 32.07 sec
|
||||
MapReduce Total cumulative CPU time: 32 seconds 70 msec
|
||||
Ended Job = job_201309101627_0039
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 32.07 sec HDFS Read: 57312623 HDFS Write: 21 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 32 seconds 70 msec
|
||||
OK
|
||||
-4662894107982093709
|
||||
Time taken: 30.94 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_22560@mturlrep13_201309101849_2023198520.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT count(DISTINCT UserID) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_22993@mturlrep13_201309101849_961728603.txt
|
||||
hive> SELECT count(DISTINCT UserID) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0040
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:49:41,232 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:49:48,264 Stage-1 map = 43%, reduce = 0%
|
||||
2013-09-10 18:49:51,283 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:52,291 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:53,298 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:54,304 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:55,310 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:56,317 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:57,332 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:58,337 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:59,342 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:50:00,348 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:50:01,353 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:02,360 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:03,365 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:04,369 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:05,375 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:06,379 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:07,385 Stage-1 map = 100%, reduce = 88%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:08,391 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
|
||||
2013-09-10 18:50:09,397 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
|
||||
2013-09-10 18:50:10,402 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
|
||||
MapReduce Total cumulative CPU time: 1 minutes 2 seconds 950 msec
|
||||
Ended Job = job_201309101627_0040
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 62.95 sec HDFS Read: 57312623 HDFS Write: 8 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 1 minutes 2 seconds 950 msec
|
||||
OK
|
||||
2037258
|
||||
Time taken: 38.84 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_24634@mturlrep13_201309101850_840502487.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_25401@mturlrep13_201309101850_84750246.txt
|
||||
hive> SELECT count(DISTINCT SearchPhrase) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0041
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:50:31,472 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:50:38,501 Stage-1 map = 43%, reduce = 0%
|
||||
2013-09-10 18:50:40,517 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:41,523 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:42,531 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:43,536 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:44,542 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:45,548 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:46,555 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:47,561 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:48,566 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 31.8 sec
|
||||
2013-09-10 18:50:49,571 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:50,576 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:51,581 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:52,587 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:53,592 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:54,597 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:55,602 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:56,607 Stage-1 map = 100%, reduce = 92%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:57,615 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 50.6 sec
|
||||
2013-09-10 18:50:58,642 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 50.6 sec
|
||||
MapReduce Total cumulative CPU time: 50 seconds 600 msec
|
||||
Ended Job = job_201309101627_0041
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 50.6 sec HDFS Read: 27820105 HDFS Write: 8 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 50 seconds 600 msec
|
||||
OK
|
||||
1110413
|
||||
Time taken: 37.04 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_26718@mturlrep13_201309101851_285967686.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT min(EventDate), max(EventDate) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_27149@mturlrep13_201309101851_2135309314.txt
|
||||
hive> SELECT min(EventDate), max(EventDate) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0042
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:51:19,077 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:51:25,106 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:26,114 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:27,123 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:28,129 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:29,135 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:30,141 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:31,147 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 20.4 sec
|
||||
2013-09-10 18:51:32,152 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:33,158 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:34,163 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:35,168 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:36,173 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:37,179 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:38,184 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:39,192 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 29.39 sec
|
||||
2013-09-10 18:51:40,198 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 29.39 sec
|
||||
MapReduce Total cumulative CPU time: 29 seconds 390 msec
|
||||
Ended Job = job_201309101627_0042
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 29.39 sec HDFS Read: 597016 HDFS Write: 6 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 29 seconds 390 msec
|
||||
OK
|
||||
Time taken: 30.908 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_28401@mturlrep13_201309101851_891001725.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_28836@mturlrep13_201309101851_1054092389.txt
|
||||
hive> SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;;
|
||||
Total MapReduce jobs = 2
|
||||
Launching Job 1 out of 2
|
||||
Number of reduce tasks not specified. Estimated from input data size: 2
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0043
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
|
||||
2013-09-10 18:51:59,809 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:52:04,838 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:05,847 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:06,855 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:07,861 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:08,868 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:09,875 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 18.07 sec
|
||||
2013-09-10 18:52:10,881 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.92 sec
|
||||
2013-09-10 18:52:11,887 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.92 sec
|
||||
2013-09-10 18:52:12,894 Stage-1 map = 100%, reduce = 67%, Cumulative CPU 25.68 sec
|
||||
2013-09-10 18:52:13,901 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 27.53 sec
|
||||
2013-09-10 18:52:14,908 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 27.53 sec
|
||||
MapReduce Total cumulative CPU time: 27 seconds 530 msec
|
||||
Ended Job = job_201309101627_0043
|
||||
Launching Job 2 out of 2
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0044
|
||||
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
|
||||
2013-09-10 18:52:17,388 Stage-2 map = 0%, reduce = 0%
|
||||
2013-09-10 18:52:19,396 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:20,401 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:21,406 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:22,411 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:23,415 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:24,420 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:25,425 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:26,430 Stage-2 map = 100%, reduce = 33%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:27,436 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
|
||||
2013-09-10 18:52:28,442 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
|
||||
2013-09-10 18:52:29,448 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
|
||||
MapReduce Total cumulative CPU time: 2 seconds 140 msec
|
||||
Ended Job = job_201309101627_0044
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 2 Cumulative CPU: 27.53 sec HDFS Read: 907716 HDFS Write: 384 SUCCESS
|
||||
Job 1: Map: 1 Reduce: 1 Cumulative CPU: 2.14 sec HDFS Read: 1153 HDFS Write: 60 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 29 seconds 670 msec
|
||||
OK
|
||||
Time taken: 39.506 seconds, Fetched: 9 row(s)
|
||||
hive> quit;
|
||||
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
|
||||
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_30667@mturlrep13_201309101852_966681525.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_31123@mturlrep13_201309101852_1252745596.txt
|
||||
hive> SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;;
|
||||
Total MapReduce jobs = 2
|
||||
Launching Job 1 out of 2
|
||||
Number of reduce tasks not specified. Estimated from input data size: 2
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0045
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
|
||||
2013-09-10 18:52:49,457 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:52:56,485 Stage-1 map = 43%, reduce = 0%
|
||||
2013-09-10 18:52:59,503 Stage-1 map = 46%, reduce = 0%, Cumulative CPU 14.56 sec
|
||||
2013-09-10 18:53:00,511 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:01,519 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:02,526 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:03,533 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:04,539 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:05,545 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:06,550 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:07,557 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:08,563 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:09,569 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:10,575 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 44.01 sec
|
||||
2013-09-10 18:53:11,598 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:12,604 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:13,609 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:14,615 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:15,620 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:16,627 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
|
||||
2013-09-10 18:53:17,634 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
|
||||
2013-09-10 18:53:18,640 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
|
||||
2013-09-10 18:53:19,646 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
|
||||
2013-09-10 18:53:20,653 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 71.27 sec
|
||||
2013-09-10 18:53:21,659 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 71.27 sec
|
||||
MapReduce Total cumulative CPU time: 1 minutes 11 seconds 270 msec
|
||||
Ended Job = job_201309101627_0045
|
||||
Launching Job 2 out of 2
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0046
|
||||
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
|
||||
2013-09-10 18:53:25,187 Stage-2 map = 0%, reduce = 0%
|
||||
2013-09-10 18:53:27,196 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:28,202 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:29,207 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:30,211 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:31,216 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:32,220 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:33,226 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:34,231 Stage-2 map = 100%, reduce = 33%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:35,237 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 3.16 sec
|
||||
2013-09-10 18:53:36,243 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 3.16 sec
|
||||
MapReduce Total cumulative CPU time: 3 seconds 160 msec
|
||||
Ended Job = job_201309101627_0046
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 2 Cumulative CPU: 71.27 sec HDFS Read: 67340015 HDFS Write: 100142 SUCCESS
|
||||
Job 1: Map: 1 Reduce: 1 Cumulative CPU: 3.16 sec HDFS Read: 100911 HDFS Write: 96 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 1 minutes 14 seconds 430 msec
|
||||
OK
|
||||
Time taken: 56.439 seconds, Fetched: 10 row(s)
|
||||
hive> quit;
|
||||
-- агрегация, среднее количество ключей.;
|
||||
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_609@mturlrep13_201309101853_355533849.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_1183@mturlrep13_201309101853_289725544.txt
|
||||
hive> SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;;
|
||||
Total MapReduce jobs = 2
|
||||
Launching Job 1 out of 2
|
||||
Number of reduce tasks not specified. Estimated from input data size: 2
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0047
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
|
||||
2013-09-10 18:53:55,838 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:54:02,865 Stage-1 map = 29%, reduce = 0%
|
||||
2013-09-10 18:54:05,876 Stage-1 map = 43%, reduce = 0%
|
||||
2013-09-10 18:54:08,894 Stage-1 map = 46%, reduce = 0%, Cumulative CPU 16.8 sec
|
||||
2013-09-10 18:54:09,901 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:10,909 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:11,915 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:12,921 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:13,927 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:14,932 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:15,938 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:16,943 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:17,949 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:18,954 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:19,959 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:20,964 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:21,970 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:22,975 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:23,980 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:24,986 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:25,991 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:26,997 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:28,002 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:29,008 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:30,014 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:31,021 Stage-1 map = 100%, reduce = 58%, Cumulative CPU 74.39 sec
|
||||
2013-09-10 18:54:32,027 Stage-1 map = 100%, reduce = 96%, Cumulative CPU 74.39 sec
|
||||
2013-09-10 18:54:33,033 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
|
||||
2013-09-10 18:54:34,038 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
|
||||
2013-09-10 18:54:35,044 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
|
||||
MapReduce Total cumulative CPU time: 1 minutes 24 seconds 50 msec
|
||||
Ended Job = job_201309101627_0047
|
||||
Launching Job 2 out of 2
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0048
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,109 +0,0 @@
|
||||
SELECT count(*) FROM hits_10m;
|
||||
SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
|
||||
SELECT sum(UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
|
||||
SELECT min(EventDate), max(EventDate) FROM hits_10m;
|
||||
|
||||
SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;
|
||||
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
|
||||
|
||||
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей.;
|
||||
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
|
||||
|
||||
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по строкам.;
|
||||
|
||||
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
|
||||
|
||||
SELECT SearchPhrase, count(*) AS c FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
|
||||
|
||||
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация чуть сложнее.;
|
||||
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по числу и строке, большое количество ключей.;
|
||||
|
||||
SELECT UserID, count(*) AS c FROM hits_10m GROUP BY UserID ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
|
||||
|
||||
SELECT UserID, SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация.;
|
||||
|
||||
SELECT UserID, SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
-- то же самое, но без сортировки.;
|
||||
|
||||
SELECT UserID, minute(EventTime), SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, minute(EventTime), SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
|
||||
|
||||
SELECT UserID FROM hits_10m WHERE UserID = 12345678901234567890;
|
||||
-- мощная фильтрация по столбцу типа UInt64.;
|
||||
|
||||
SELECT count(*) AS c FROM hits_10m WHERE URL LIKE '%metrika%';
|
||||
-- фильтрация по поиску подстроки в строке.;
|
||||
|
||||
SELECT SearchPhrase, MAX(URL), count(*) AS c FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- вынимаем большие столбцы, фильтрация по строке.;
|
||||
|
||||
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- чуть больше столбцы.;
|
||||
|
||||
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
-- плохой запрос - вынимаем все столбцы.;
|
||||
|
||||
SELECT SearchPhrase, EventTime FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
-- большая сортировка.;
|
||||
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по строкам.;
|
||||
|
||||
SELECT SearchPhrase, EventTime FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по кортежу.;
|
||||
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- считаем средние длины URL для крупных счётчиков.;
|
||||
|
||||
SELECT SUBSTRING(SUBSTRING(Referer, instr(Referer, '//') + 2), 1, if(0 < instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/') - 1, instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/' ) - 1, 0)), avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_100m WHERE Referer != '' GROUP BY SUBSTRING(SUBSTRING(Referer, instr(Referer, '//') + 2), 1, if(0 < instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/') - 1, instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/' ) - 1, 0)) HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- то же самое, но с разбивкой по доменам.;
|
||||
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
|
||||
-- много тупых агрегатных функций.;
|
||||
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
|
||||
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
|
||||
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
-- то же самое, но ещё и без фильтрации.;
|
||||
|
||||
SELECT URL, count(*) AS c FROM hits_10m GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по URL.;
|
||||
|
||||
SELECT 1, URL, count(*) AS c FROM hits_10m GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по URL и числу.;
|
||||
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT DontCountHits != 0 AND NOT Refresh != 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT DontCountHits != 0 AND NOT Refresh != 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND IsLink != 0 AND NOT IsDownload != 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, URL, count(*) as c, if(SearchEngineID = 0 AND AdvEngineID = 0 , Referer, '') as src FROM hits_100m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, URL, if(SearchEngineID = 0 AND AdvEngineID = 0 , Referer, '') ORDER BY c DESC LIMIT 1000;
|
||||
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
|
||||
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND NOT DontCountHits != 0 AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
|
||||
SELECT unix_timestamp(EventTime) - SECOND(EventTime) AS m, count(*) FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-02') AND NOT Refresh != 0 AND NOT DontCountHits != 0 GROUP BY unix_timestamp(EventTime) - SECOND(EventTime) ORDER BY m;
|
@ -1,2 +0,0 @@
|
||||
cd /home/kartavyy/benchmark
|
||||
./benchmark.sh -c hive/conf.sh -n $1 > hive/log/log_$1
|
@ -1,4 +0,0 @@
|
||||
CONF_DIR=/home/kartavyy/benchmark/mysql
|
||||
expect_file=$CONF_DIR/expect.tcl
|
||||
test_file=$CONF_DIR/queries.sql
|
||||
etc_init_d_service=/etc/init.d/mysql
|
@ -1,7 +0,0 @@
|
||||
create table hits_10m( WatchID BIGINT, JavaEnable TINYINT UNSIGNED, Title VARCHAR(1024), GoodEvent SMALLINT, EventTime DATETIME, EventDate DATE, CounterID INTEGER UNSIGNED, ClientIP INTEGER UNSIGNED, RegionID INTEGER UNSIGNED, UserID BIGINT, CounterClass TINYINT, OS SMALLINT, UserAgent SMALLINT, URL VARCHAR(6072), Referer VARCHAR(2048), Refresh TINYINT, RefererCategoryID SMALLINT UNSIGNED, RefererRegionID INTEGER UNSIGNED, URLCategoryID SMALLINT UNSIGNED, URLRegionID INTEGER UNSIGNED, ResolutionWidth SMALLINT UNSIGNED, ResolutionHeight SMALLINT UNSIGNED, ResolutionDepth TINYINT UNSIGNED, FlashMajor TINYINT UNSIGNED, FlashMinor TINYINT UNSIGNED, FlashMinor2 VARCHAR(256), NetMajor TINYINT UNSIGNED, NetMinor TINYINT UNSIGNED, UserAgentMajor SMALLINT UNSIGNED, UserAgentMinor CHAR(2), CookieEnable TINYINT UNSIGNED, JavascriptEnable TINYINT UNSIGNED, IsMobile TINYINT UNSIGNED, MobilePhone TINYINT UNSIGNED, MobilePhoneModel VARCHAR(80), Params VARCHAR(2048), IPNetworkID INT UNSIGNED, TraficSourceID SMALLINT, SearchEngineID SMALLINT UNSIGNED, SearchPhrase VARCHAR(1024), AdvEngineID TINYINT UNSIGNED, IsArtifical TINYINT UNSIGNED, WindowClientWidth SMALLINT UNSIGNED, WindowClientHeight SMALLINT UNSIGNED, ClientTimeZone INTEGER, ClientEventTime DATETIME, SilverlightVersion1 TINYINT UNSIGNED, SilverlightVersion2 TINYINT UNSIGNED, SilverlightVersion3 INT UNSIGNED, SilverlightVersion4 SMALLINT UNSIGNED, PageCharset VARCHAR(80), CodeVersion INT UNSIGNED, IsLink TINYINT UNSIGNED, IsDownload TINYINT UNSIGNED, IsNotBounce TINYINT UNSIGNED, FUniqID BIGINT, OriginalURL VARCHAR(6072), HID INT UNSIGNED, IsOldCounter TINYINT UNSIGNED, IsEvent TINYINT UNSIGNED, IsParameter TINYINT UNSIGNED, DontCountHits TINYINT UNSIGNED, WithHash TINYINT UNSIGNED, HitColor CHAR(1), LocalEventTime DATETIME, Age TINYINT UNSIGNED, Sex TINYINT UNSIGNED, Income TINYINT UNSIGNED, Interests SMALLINT UNSIGNED, Robotness TINYINT UNSIGNED, RemoteIP INT UNSIGNED, WindowName INT, OpenerName INT, HistoryLength SMALLINT, BrowserLanguage CHAR(2), BrowserCountry CHAR(2), SocialNetwork VARCHAR(128), SocialAction VARCHAR(128), HTTPError SMALLINT UNSIGNED, SendTiming INT UNSIGNED, DNSTiming INT UNSIGNED, ConnectTiming INTEGER UNSIGNED, ResponseStartTiming INTEGER UNSIGNED, ResponseEndTiming INTEGER UNSIGNED, FetchTiming INTEGER UNSIGNED, SocialSourceNetworkID TINYINT UNSIGNED, SocialSourcePage VARCHAR(128), ParamPrice BIGINT, ParamOrderID VARCHAR(80), ParamCurrency CHAR(3), ParamCurrencyID SMALLINT UNSIGNED, OpenstatServiceName VARCHAR(80), OpenstatCampaignID VARCHAR(80), OpenstatAdID VARCHAR(80), OpenstatSourceID VARCHAR(80), UTMSource VARCHAR(256), UTMMedium VARCHAR(256), UTMCampaign VARCHAR(256), UTMContent VARCHAR(256), UTMTerm VARCHAR(256), FromTag VARCHAR(256), HasGCLID TINYINT UNSIGNED, RefererHash BIGINT, URLHash BIGINT, CLID INTEGER UNSIGNED, UserIDHash BIGINT UNSIGNED) ENGINE=MYISAM;
|
||||
|
||||
CREATE INDEX hits_10m_ind on hits_10m (CounterID, EventDate, UserIDHash, EventTime) using BTREE;
|
||||
|
||||
load data infile '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' into table hits_10m FIELDS TERMINATED BY '\t' ESCAPED BY '\\' ;
|
||||
|
||||
|
@ -1,23 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#!/bin/expect
|
||||
|
||||
# Set timeout
|
||||
set timeout 600
|
||||
|
||||
# Get arguments
|
||||
set query [lindex $argv 0]
|
||||
|
||||
spawn mysql -u root
|
||||
|
||||
expect "mysql>"
|
||||
send "use hits\r"
|
||||
|
||||
expect "mysql>"
|
||||
|
||||
send "$query\r"
|
||||
|
||||
expect "mysql>"
|
||||
|
||||
send "quit\r"
|
||||
|
||||
expect eof
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,111 +0,0 @@
|
||||
SELECT SQL_NO_CACHE count(*) FROM hits_10m;
|
||||
SELECT SQL_NO_CACHE count(*) FROM hits_10m WHERE AdvEngineID != 0;
|
||||
SELECT SQL_NO_CACHE sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
|
||||
SELECT SQL_NO_CACHE sum(UserID) FROM hits_10m;
|
||||
SELECT SQL_NO_CACHE count(DISTINCT UserID) FROM hits_10m;
|
||||
SELECT SQL_NO_CACHE count(DISTINCT SearchPhrase) FROM hits_10m;
|
||||
SELECT SQL_NO_CACHE min(EventDate), max(EventDate) FROM hits_10m;
|
||||
|
||||
SELECT SQL_NO_CACHE AdvEngineID, count(*) FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
|
||||
|
||||
SELECT SQL_NO_CACHE RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей.;
|
||||
|
||||
SELECT SQL_NO_CACHE RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
|
||||
|
||||
SELECT SQL_NO_CACHE MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по строкам.;
|
||||
|
||||
SELECT SQL_NO_CACHE MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация чуть сложнее.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchEngineID, SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по числу и строке, большое количество ключей.;
|
||||
|
||||
SELECT SQL_NO_CACHE UserID, count(*) FROM hits_10m GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
|
||||
|
||||
SELECT SQL_NO_CACHE UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация.;
|
||||
|
||||
SELECT SQL_NO_CACHE UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
-- то же самое, но без сортировки.;
|
||||
|
||||
SELECT SQL_NO_CACHE UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
|
||||
|
||||
SELECT SQL_NO_CACHE UserID FROM hits_10m WHERE UserID = 12345678901234567890;
|
||||
-- мощная фильтрация по столбцу типа UInt64.;
|
||||
|
||||
SELECT SQL_NO_CACHE count(*) FROM hits_10m WHERE URL LIKE '%metrika%';
|
||||
-- фильтрация по поиску подстроки в строке.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchPhrase, MAX(URL), count(*) FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- вынимаем большие столбцы, фильтрация по строке.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- чуть больше столбцы.;
|
||||
|
||||
SELECT SQL_NO_CACHE * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
-- плохой запрос - вынимаем все столбцы.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
-- большая сортировка.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по строкам.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по кортежу.;
|
||||
|
||||
SELECT SQL_NO_CACHE CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- считаем средние длины URL для крупных счётчиков.;
|
||||
|
||||
SELECT SQL_NO_CACHE SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS k, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m WHERE Referer != '' GROUP BY k HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- то же самое, но с разбивкой по доменам.;
|
||||
|
||||
SELECT SQL_NO_CACHE sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
|
||||
-- много тупых агрегатных функций.;
|
||||
|
||||
SELECT SQL_NO_CACHE SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
|
||||
|
||||
SELECT SQL_NO_CACHE WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
|
||||
|
||||
SELECT SQL_NO_CACHE WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- то же самое, но ещё и без фильтрации.;
|
||||
|
||||
SELECT SQL_NO_CACHE URL, count(*) FROM hits_10m GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по URL.;
|
||||
|
||||
SELECT SQL_NO_CACHE 1, URL, count(*) FROM hits_10m GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по URL и числу.;
|
||||
|
||||
SELECT SQL_NO_CACHE ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
|
||||
|
||||
SELECT SQL_NO_CACHE URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT SQL_NO_CACHE Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT SQL_NO_CACHE URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
|
||||
|
||||
SELECT SQL_NO_CACHE TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
|
||||
|
||||
SELECT SQL_NO_CACHE URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
|
||||
|
||||
SELECT SQL_NO_CACHE WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
|
||||
SELECT SQL_NO_CACHE EventTime - INTERVAL SECOND(EventTime) SECOND AS Minute, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;
|
@ -1,5 +0,0 @@
|
||||
CONF_DIR=/home/kartavyy/benchmark/infobright
|
||||
expect_file=$CONF_DIR/expect.tcl
|
||||
test_file=$CONF_DIR/queries.sql
|
||||
etc_init_d_service=/etc/init.d/mysqld-ib
|
||||
|
@ -1,111 +0,0 @@
|
||||
create table hits_10m
|
||||
(
|
||||
WatchID BIGINT,
|
||||
JavaEnable SMALLINT,
|
||||
Title VARCHAR(1400),
|
||||
GoodEvent SMALLINT,
|
||||
EventTime TIMESTAMP,
|
||||
EventDate DATE,
|
||||
CounterID BIGINT,
|
||||
ClientIP BIGINT,
|
||||
RegionID BIGINT,
|
||||
UserID BIGINT,
|
||||
CounterClass TINYINT,
|
||||
OS SMALLINT,
|
||||
UserAgent SMALLINT,
|
||||
URL VARCHAR(7800),
|
||||
Referer VARCHAR(3125),
|
||||
Refresh TINYINT,
|
||||
RefererCategoryID INT,
|
||||
RefererRegionID BIGINT,
|
||||
URLCategoryID INT,
|
||||
URLRegionID BIGINT,
|
||||
ResolutionWidth INT,
|
||||
ResolutionHeight INT,
|
||||
ResolutionDepth SMALLINT,
|
||||
FlashMajor SMALLINT,
|
||||
FlashMinor SMALLINT,
|
||||
FlashMinor2 VARCHAR(256),
|
||||
NetMajor SMALLINT,
|
||||
NetMinor SMALLINT,
|
||||
UserAgentMajor INT,
|
||||
UserAgentMinor CHAR(2),
|
||||
CookieEnable SMALLINT,
|
||||
JavascriptEnable SMALLINT,
|
||||
IsMobile SMALLINT,
|
||||
MobilePhone SMALLINT,
|
||||
MobilePhoneModel VARCHAR(80),
|
||||
Params VARCHAR(2925),
|
||||
IPNetworkID BIGINT,
|
||||
TraficSourceID SMALLINT,
|
||||
SearchEngineID INT,
|
||||
SearchPhrase VARCHAR(2008),
|
||||
AdvEngineID SMALLINT,
|
||||
IsArtifical SMALLINT,
|
||||
WindowClientWidth INT,
|
||||
WindowClientHeight INT,
|
||||
ClientTimeZone INTEGER,
|
||||
ClientEventTime TIMESTAMP,
|
||||
SilverlightVersion1 SMALLINT,
|
||||
SilverlightVersion2 SMALLINT,
|
||||
SilverlightVersion3 BIGINT,
|
||||
SilverlightVersion4 INT,
|
||||
PageCharset VARCHAR(80),
|
||||
CodeVersion BIGINT,
|
||||
IsLink SMALLINT,
|
||||
IsDownload SMALLINT,
|
||||
IsNotBounce SMALLINT,
|
||||
FUniqID BIGINT,
|
||||
OriginalURL VARCHAR(8181),
|
||||
HID BIGINT,
|
||||
IsOldCounter SMALLINT,
|
||||
IsEvent SMALLINT,
|
||||
IsParameter SMALLINT,
|
||||
DontCountHits SMALLINT,
|
||||
WithHash SMALLINT,
|
||||
HitColor CHAR(1),
|
||||
LocalEventTime TIMESTAMP,
|
||||
Age SMALLINT,
|
||||
Sex SMALLINT,
|
||||
Income SMALLINT,
|
||||
Interests INT,
|
||||
Robotness SMALLINT,
|
||||
RemoteIP BIGINT,
|
||||
WindowName INT,
|
||||
OpenerName INT,
|
||||
HistoryLength SMALLINT,
|
||||
BrowserLanguage CHAR(2),
|
||||
BrowserCountry CHAR(2),
|
||||
SocialNetwork VARCHAR(128),
|
||||
SocialAction VARCHAR(128),
|
||||
HTTPError INT,
|
||||
SendTiming BIGINT,
|
||||
DNSTiming BIGINT,
|
||||
ConnectTiming BIGINT,
|
||||
ResponseStartTiming BIGINT,
|
||||
ResponseEndTiming BIGINT,
|
||||
FetchTiming BIGINT,
|
||||
SocialSourceNetworkID SMALLINT,
|
||||
SocialSourcePage VARCHAR(256),
|
||||
ParamPrice BIGINT,
|
||||
ParamOrderID VARCHAR(80),
|
||||
ParamCurrency CHAR(3),
|
||||
ParamCurrencyID INT,
|
||||
OpenstatServiceName VARCHAR(80),
|
||||
OpenstatCampaignID VARCHAR(512),
|
||||
OpenstatAdID VARCHAR(80),
|
||||
OpenstatSourceID VARCHAR(256),
|
||||
UTMSource VARCHAR(256),
|
||||
UTMMedium VARCHAR(256),
|
||||
UTMCampaign VARCHAR(407),
|
||||
UTMContent VARCHAR(256),
|
||||
UTMTerm VARCHAR(437),
|
||||
FromTag VARCHAR(428),
|
||||
HasGCLID SMALLINT,
|
||||
RefererHash BIGINT,
|
||||
URLHash BIGINT,
|
||||
CLID BIGINT,
|
||||
UserIDHash BIGINT
|
||||
);
|
||||
|
||||
LOAD DATA INFILE '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' INTO TABLE hits_10m FIELDS TERMINATED BY '\t' ESCAPED BY '\\' ENCLOSED BY "NULL";
|
@ -1,18 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#!/bin/expect
|
||||
|
||||
# Set timeout
|
||||
set timeout 600
|
||||
|
||||
# Get arguments
|
||||
set query [lindex $argv 0]
|
||||
|
||||
spawn mysql-ib -u root -D hits
|
||||
|
||||
expect "mysql>"
|
||||
send "$query\r"
|
||||
|
||||
expect "mysql>"
|
||||
send "quit\r"
|
||||
|
||||
expect eof
|
File diff suppressed because it is too large
Load Diff
@ -1,113 +0,0 @@
|
||||
-- set GLOBAL max_length_for_sort_data = 8388608;
|
||||
|
||||
SELECT count(*) FROM hits_10m;
|
||||
SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
|
||||
SELECT sum(UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
|
||||
SELECT min(EventDate), max(EventDate) FROM hits_10m;
|
||||
|
||||
SELECT AdvEngineID, count(*) FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
|
||||
|
||||
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей.;
|
||||
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
|
||||
|
||||
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по строкам.;
|
||||
|
||||
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
|
||||
|
||||
SELECT SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
|
||||
|
||||
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация чуть сложнее.;
|
||||
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по числу и строке, большое количество ключей.;
|
||||
|
||||
SELECT UserID, count(*) FROM hits_10m GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
|
||||
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация.;
|
||||
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
-- то же самое, но без сортировки.;
|
||||
|
||||
SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
|
||||
|
||||
SELECT UserID FROM hits_10m WHERE UserID = 123456789;
|
||||
-- мощная фильтрация по столбцу типа UInt64.;
|
||||
|
||||
SELECT count(*) FROM hits_10m WHERE URL LIKE '%metrika%';
|
||||
-- фильтрация по поиску подстроки в строке.;
|
||||
|
||||
SELECT SearchPhrase, MAX(URL), count(*) FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- вынимаем большие столбцы, фильтрация по строке.;
|
||||
|
||||
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
-- чуть больше столбцы.;
|
||||
|
||||
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
-- плохой запрос - вынимаем все столбцы.;
|
||||
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
-- большая сортировка.;
|
||||
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по строкам.;
|
||||
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по кортежу.;
|
||||
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- считаем средние длины URL для крупных счётчиков.;
|
||||
|
||||
SELECT SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS k, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m WHERE Referer != '' GROUP BY k HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- то же самое, но с разбивкой по доменам.;
|
||||
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
|
||||
-- много тупых агрегатных функций.;
|
||||
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
|
||||
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
|
||||
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
-- то же самое, но ещё и без фильтрации.;
|
||||
|
||||
SELECT URL, count(*) FROM hits_10m GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по URL.;
|
||||
|
||||
SELECT 1, URL, count(*) FROM hits_10m GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
|
||||
-- агрегация по URL и числу.;
|
||||
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
|
||||
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
|
||||
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
|
||||
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
|
||||
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
|
||||
SELECT EventTime - INTERVAL SECOND(EventTime) SECOND AS Minute, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;
|
@ -1,20 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
sync
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
|
||||
RES=$(mysql -u root -h 127.0.0.1 -P 3306 --database=test -t -vvv -e "$query" 2>&1 | grep ' set ' | grep -oP '\d+\.\d+')
|
||||
|
||||
[[ "$?" == "0" ]] && echo -n "$RES" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
@ -1,141 +0,0 @@
|
||||
Note: column store in MemSQL was introduced in Feb 2014.
|
||||
|
||||
http://www.memsql.com/download/
|
||||
http://docs.memsql.com/docs/latest/setup/setup_onprem.html
|
||||
wget http://download.memsql.com/8d9f4c4d99a547baa40ba097b171bd15/memsql-3.2.x86_64.deb
|
||||
scp memsql-3.2.x86_64.deb example05e:~
|
||||
ssh example05e
|
||||
sudo dpkg -i memsql-3.2.x86_64.deb
|
||||
|
||||
sudo mkdir /opt/memsql-data/
|
||||
sudo cp -r /var/lib/memsql/data/* /opt/memsql-data/
|
||||
sudo rm -rf /var/lib/memsql/data
|
||||
sudo ln -s /opt/memsql-data /var/lib/memsql/data
|
||||
sudo chown -R memsql /opt/memsql-data
|
||||
sudo chown -R memsql /var/lib/memsql/data
|
||||
|
||||
sudo service memsql start
|
||||
mysql -u root -h 127.0.0.1 -P 3306 --prompt="memsql> "
|
||||
|
||||
CREATE DATABASE test;
|
||||
USE test;
|
||||
|
||||
CREATE TABLE hits_10m
|
||||
(
|
||||
WatchID BIGINT,
|
||||
JavaEnable SMALLINT,
|
||||
Title VARCHAR(1400),
|
||||
GoodEvent SMALLINT,
|
||||
EventTime TIMESTAMP,
|
||||
EventDate DATE,
|
||||
CounterID BIGINT,
|
||||
ClientIP BIGINT,
|
||||
RegionID BIGINT,
|
||||
UserID BIGINT,
|
||||
CounterClass TINYINT,
|
||||
OS SMALLINT,
|
||||
UserAgent SMALLINT,
|
||||
URL VARCHAR(7800),
|
||||
Referer VARCHAR(3125),
|
||||
Refresh TINYINT,
|
||||
RefererCategoryID INT,
|
||||
RefererRegionID BIGINT,
|
||||
URLCategoryID INT,
|
||||
URLRegionID BIGINT,
|
||||
ResolutionWidth INT,
|
||||
ResolutionHeight INT,
|
||||
ResolutionDepth SMALLINT,
|
||||
FlashMajor SMALLINT,
|
||||
FlashMinor SMALLINT,
|
||||
FlashMinor2 VARCHAR(256),
|
||||
NetMajor SMALLINT,
|
||||
NetMinor SMALLINT,
|
||||
UserAgentMajor INT,
|
||||
UserAgentMinor CHAR(2),
|
||||
CookieEnable SMALLINT,
|
||||
JavascriptEnable SMALLINT,
|
||||
IsMobile SMALLINT,
|
||||
MobilePhone SMALLINT,
|
||||
MobilePhoneModel VARCHAR(80),
|
||||
Params VARCHAR(2925),
|
||||
IPNetworkID BIGINT,
|
||||
TraficSourceID SMALLINT,
|
||||
SearchEngineID INT,
|
||||
SearchPhrase VARCHAR(2008),
|
||||
AdvEngineID SMALLINT,
|
||||
IsArtifical SMALLINT,
|
||||
WindowClientWidth INT,
|
||||
WindowClientHeight INT,
|
||||
ClientTimeZone INTEGER,
|
||||
ClientEventTime TIMESTAMP,
|
||||
SilverlightVersion1 SMALLINT,
|
||||
SilverlightVersion2 SMALLINT,
|
||||
SilverlightVersion3 BIGINT,
|
||||
SilverlightVersion4 INT,
|
||||
PageCharset VARCHAR(80),
|
||||
CodeVersion BIGINT,
|
||||
IsLink SMALLINT,
|
||||
IsDownload SMALLINT,
|
||||
IsNotBounce SMALLINT,
|
||||
FUniqID BIGINT,
|
||||
OriginalURL VARCHAR(8181),
|
||||
HID BIGINT,
|
||||
IsOldCounter SMALLINT,
|
||||
IsEvent SMALLINT,
|
||||
IsParameter SMALLINT,
|
||||
DontCountHits SMALLINT,
|
||||
WithHash SMALLINT,
|
||||
HitColor CHAR(1),
|
||||
LocalEventTime TIMESTAMP,
|
||||
Age SMALLINT,
|
||||
Sex SMALLINT,
|
||||
Income SMALLINT,
|
||||
Interests INT,
|
||||
Robotness SMALLINT,
|
||||
RemoteIP BIGINT,
|
||||
WindowName INT,
|
||||
OpenerName INT,
|
||||
HistoryLength SMALLINT,
|
||||
BrowserLanguage CHAR(2),
|
||||
BrowserCountry CHAR(2),
|
||||
SocialNetwork VARCHAR(128),
|
||||
SocialAction VARCHAR(128),
|
||||
HTTPError INT,
|
||||
SendTiming BIGINT,
|
||||
DNSTiming BIGINT,
|
||||
ConnectTiming BIGINT,
|
||||
ResponseStartTiming BIGINT,
|
||||
ResponseEndTiming BIGINT,
|
||||
FetchTiming BIGINT,
|
||||
SocialSourceNetworkID SMALLINT,
|
||||
SocialSourcePage VARCHAR(256),
|
||||
ParamPrice BIGINT,
|
||||
ParamOrderID VARCHAR(80),
|
||||
ParamCurrency CHAR(3),
|
||||
ParamCurrencyID INT,
|
||||
OpenstatServiceName VARCHAR(80),
|
||||
OpenstatCampaignID VARCHAR(512),
|
||||
OpenstatAdID VARCHAR(80),
|
||||
OpenstatSourceID VARCHAR(256),
|
||||
UTMSource VARCHAR(256),
|
||||
UTMMedium VARCHAR(256),
|
||||
UTMCampaign VARCHAR(407),
|
||||
UTMContent VARCHAR(256),
|
||||
UTMTerm VARCHAR(437),
|
||||
FromTag VARCHAR(428),
|
||||
HasGCLID SMALLINT,
|
||||
RefererHash BIGINT,
|
||||
URLHash BIGINT,
|
||||
CLID BIGINT,
|
||||
INDEX ColumnStoreIndex USING CLUSTERED COLUMNSTORE (CounterID, EventDate, UserID, EventTime)
|
||||
);
|
||||
|
||||
Table creation takes about 15 seconds.
|
||||
|
||||
LOAD DATA INFILE '/opt/dumps/hits_10m_corrected.tsv' INTO TABLE hits_10m;
|
||||
|
||||
12 min 24.51 sec
|
||||
|
||||
13422 rows/sec.
|
||||
|
||||
data size: 1 613 773 528 bytes.
|
@ -1,43 +0,0 @@
|
||||
SELECT count(*) FROM hits_10m;
|
||||
SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
|
||||
SELECT sum(UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
|
||||
SELECT min(EventDate), max(EventDate) FROM hits_10m;
|
||||
SELECT AdvEngineID, count(*) FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, count(*) FROM hits_10m GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID FROM hits_10m WHERE UserID = 123456789;
|
||||
SELECT count(*) FROM hits_10m WHERE URL LIKE '%metrika%';
|
||||
SELECT SearchPhrase, MAX(URL), count(*) FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS k, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m WHERE Referer != '' GROUP BY k HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT URL, count(*) FROM hits_10m GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT 1, URL, count(*) FROM hits_10m GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT EventTime - INTERVAL SECOND(EventTime) SECOND AS Minute, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;
|
File diff suppressed because it is too large
Load Diff
@ -1,10 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits/' | while read query; do
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches
|
||||
|
||||
echo "$query";
|
||||
for i in {1..3}; do
|
||||
./send-query "$query" 2>&1 | grep -P '\d+ tuple|clk: |unknown|overflow|error';
|
||||
done;
|
||||
done;
|
@ -1,356 +0,0 @@
|
||||
Go to https://www.monetdb.org/
|
||||
|
||||
Dowload now.
|
||||
Latest binary releases.
|
||||
Ubuntu & Debian.
|
||||
|
||||
https://www.monetdb.org/downloads/deb/
|
||||
|
||||
Go to the server where you want to install MonetDB.
|
||||
```
|
||||
$ sudo mcedit /etc/apt/sources.list.d/monetdb.list
|
||||
```
|
||||
Write:
|
||||
```
|
||||
deb https://dev.monetdb.org/downloads/deb/ bionic monetdb
|
||||
```
|
||||
|
||||
```
|
||||
$ wget --output-document=- https://www.monetdb.org/downloads/MonetDB-GPG-KEY | sudo apt-key add -
|
||||
|
||||
$ sudo apt update
|
||||
$ sudo apt install monetdb5-sql monetdb-client
|
||||
|
||||
$ sudo systemctl enable monetdbd
|
||||
$ sudo systemctl start monetdbd
|
||||
$ sudo usermod -a -G monetdb $USER
|
||||
```
|
||||
|
||||
Logout and login back to your server.
|
||||
|
||||
Tutorial:
|
||||
https://www.monetdb.org/Documentation/UserGuide/Tutorial
|
||||
|
||||
Creating the database:
|
||||
|
||||
```
|
||||
$ sudo mkdir /opt/monetdb
|
||||
$ sudo chmod 777 /opt/monetdb
|
||||
$ monetdbd create /opt/monetdb
|
||||
|
||||
$ monetdbd start /opt/monetdb
|
||||
cannot remove socket files
|
||||
```
|
||||
|
||||
Now you have to stop MonetDB, copy the contents of `/var/monetdb5` to `/opt/monetdb` and replace the `/var/monetdb5` with symlink to `/opt/monetdb`. This is necessary, because I don't have free space in `/var` and creation of database in `/opt` did not succeed.
|
||||
|
||||
Start MonetDB again.
|
||||
|
||||
```
|
||||
$ sudo systemctl start monetdbd
|
||||
```
|
||||
|
||||
```
|
||||
$ monetdb create test
|
||||
created database in maintenance mode: test
|
||||
|
||||
$ monetdb release test
|
||||
taken database out of maintenance mode: test
|
||||
```
|
||||
|
||||
Run client:
|
||||
```
|
||||
$ mclient -u monetdb -d test
|
||||
```
|
||||
|
||||
Type password: monetdb
|
||||
|
||||
```
|
||||
CREATE TABLE hits
|
||||
(
|
||||
"WatchID" BIGINT,
|
||||
"JavaEnable" TINYINT,
|
||||
"Title" TEXT,
|
||||
"GoodEvent" SMALLINT,
|
||||
"EventTime" TIMESTAMP,
|
||||
"EventDate" Date,
|
||||
"CounterID" INTEGER,
|
||||
"ClientIP" INTEGER,
|
||||
"RegionID" INTEGER,
|
||||
"UserID" BIGINT,
|
||||
"CounterClass" TINYINT,
|
||||
"OS" TINYINT,
|
||||
"UserAgent" TINYINT,
|
||||
"URL" TEXT,
|
||||
"Referer" TEXT,
|
||||
"Refresh" TINYINT,
|
||||
"RefererCategoryID" SMALLINT,
|
||||
"RefererRegionID" INTEGER,
|
||||
"URLCategoryID" SMALLINT,
|
||||
"URLRegionID" INTEGER,
|
||||
"ResolutionWidth" SMALLINT,
|
||||
"ResolutionHeight" SMALLINT,
|
||||
"ResolutionDepth" TINYINT,
|
||||
"FlashMajor" TINYINT,
|
||||
"FlashMinor" TINYINT,
|
||||
"FlashMinor2" TEXT,
|
||||
"NetMajor" TINYINT,
|
||||
"NetMinor" TINYINT,
|
||||
"UserAgentMajor" SMALLINT,
|
||||
"UserAgentMinor" TEXT,
|
||||
"CookieEnable" TINYINT,
|
||||
"JavascriptEnable" TINYINT,
|
||||
"IsMobile" TINYINT,
|
||||
"MobilePhone" TINYINT,
|
||||
"MobilePhoneModel" TEXT,
|
||||
"Params" TEXT,
|
||||
"IPNetworkID" INTEGER,
|
||||
"TraficSourceID" TINYINT,
|
||||
"SearchEngineID" SMALLINT,
|
||||
"SearchPhrase" TEXT,
|
||||
"AdvEngineID" TINYINT,
|
||||
"IsArtifical" TINYINT,
|
||||
"WindowClientWidth" SMALLINT,
|
||||
"WindowClientHeight" SMALLINT,
|
||||
"ClientTimeZone" SMALLINT,
|
||||
"ClientEventTime" TIMESTAMP,
|
||||
"SilverlightVersion1" TINYINT,
|
||||
"SilverlightVersion2" TINYINT,
|
||||
"SilverlightVersion3" INTEGER,
|
||||
"SilverlightVersion4" SMALLINT,
|
||||
"PageCharset" TEXT,
|
||||
"CodeVersion" INTEGER,
|
||||
"IsLink" TINYINT,
|
||||
"IsDownload" TINYINT,
|
||||
"IsNotBounce" TINYINT,
|
||||
"FUniqID" BIGINT,
|
||||
"OriginalURL" TEXT,
|
||||
"HID" INTEGER,
|
||||
"IsOldCounter" TINYINT,
|
||||
"IsEvent" TINYINT,
|
||||
"IsParameter" TINYINT,
|
||||
"DontCountHits" TINYINT,
|
||||
"WithHash" TINYINT,
|
||||
"HitColor" TEXT,
|
||||
"LocalEventTime" TIMESTAMP,
|
||||
"Age" TINYINT,
|
||||
"Sex" TINYINT,
|
||||
"Income" TINYINT,
|
||||
"Interests" SMALLINT,
|
||||
"Robotness" TINYINT,
|
||||
"RemoteIP" INTEGER,
|
||||
"WindowName" INTEGER,
|
||||
"OpenerName" INTEGER,
|
||||
"HistoryLength" SMALLINT,
|
||||
"BrowserLanguage" TEXT,
|
||||
"BrowserCountry" TEXT,
|
||||
"SocialNetwork" TEXT,
|
||||
"SocialAction" TEXT,
|
||||
"HTTPError" SMALLINT,
|
||||
"SendTiming" INTEGER,
|
||||
"DNSTiming" INTEGER,
|
||||
"ConnectTiming" INTEGER,
|
||||
"ResponseStartTiming" INTEGER,
|
||||
"ResponseEndTiming" INTEGER,
|
||||
"FetchTiming" INTEGER,
|
||||
"SocialSourceNetworkID" TINYINT,
|
||||
"SocialSourcePage" TEXT,
|
||||
"ParamPrice" BIGINT,
|
||||
"ParamOrderID" TEXT,
|
||||
"ParamCurrency" TEXT,
|
||||
"ParamCurrencyID" SMALLINT,
|
||||
"OpenstatServiceName" TEXT,
|
||||
"OpenstatCampaignID" TEXT,
|
||||
"OpenstatAdID" TEXT,
|
||||
"OpenstatSourceID" TEXT,
|
||||
"UTMSource" TEXT,
|
||||
"UTMMedium" TEXT,
|
||||
"UTMCampaign" TEXT,
|
||||
"UTMContent" TEXT,
|
||||
"UTMTerm" TEXT,
|
||||
"FromTag" TEXT,
|
||||
"HasGCLID" TINYINT,
|
||||
"RefererHash" BIGINT,
|
||||
"URLHash" BIGINT,
|
||||
"CLID" INTEGER
|
||||
);
|
||||
```
|
||||
|
||||
# How to prepare data
|
||||
|
||||
Download the 100 million rows dataset from here and insert into ClickHouse:
|
||||
https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/
|
||||
|
||||
Create the dataset from ClickHouse:
|
||||
|
||||
```
|
||||
SELECT
|
||||
toInt64(WatchID) = -9223372036854775808 ? -9223372036854775807 : toInt64(WatchID),
|
||||
toInt8(JavaEnable) = -128 ? -127 : toInt8(JavaEnable),
|
||||
toValidUTF8(toString(Title)),
|
||||
toInt16(GoodEvent) = -32768 ? -32767 : toInt16(GoodEvent),
|
||||
EventTime,
|
||||
EventDate,
|
||||
toInt32(CounterID) = -2147483648 ? -2147483647 : toInt32(CounterID),
|
||||
toInt32(ClientIP) = -2147483648 ? -2147483647 : toInt32(ClientIP),
|
||||
toInt32(RegionID) = -2147483648 ? -2147483647 : toInt32(RegionID),
|
||||
toInt64(UserID) = -9223372036854775808 ? -9223372036854775807 : toInt64(UserID),
|
||||
toInt8(CounterClass) = -128 ? -127 : toInt8(CounterClass),
|
||||
toInt8(OS) = -128 ? -127 : toInt8(OS),
|
||||
toInt8(UserAgent) = -128 ? -127 : toInt8(UserAgent),
|
||||
toValidUTF8(toString(URL)),
|
||||
toValidUTF8(toString(Referer)),
|
||||
toInt8(Refresh) = -128 ? -127 : toInt8(Refresh),
|
||||
toInt16(RefererCategoryID) = -32768 ? -32767 : toInt16(RefererCategoryID),
|
||||
toInt32(RefererRegionID) = -2147483648 ? -2147483647 : toInt32(RefererRegionID),
|
||||
toInt16(URLCategoryID) = -32768 ? -32767 : toInt16(URLCategoryID),
|
||||
toInt32(URLRegionID) = -2147483648 ? -2147483647 : toInt32(URLRegionID),
|
||||
toInt16(ResolutionWidth) = -32768 ? -32767 : toInt16(ResolutionWidth),
|
||||
toInt16(ResolutionHeight) = -32768 ? -32767 : toInt16(ResolutionHeight),
|
||||
toInt8(ResolutionDepth) = -128 ? -127 : toInt8(ResolutionDepth),
|
||||
toInt8(FlashMajor) = -128 ? -127 : toInt8(FlashMajor),
|
||||
toInt8(FlashMinor) = -128 ? -127 : toInt8(FlashMinor),
|
||||
toValidUTF8(toString(FlashMinor2)),
|
||||
toInt8(NetMajor) = -128 ? -127 : toInt8(NetMajor),
|
||||
toInt8(NetMinor) = -128 ? -127 : toInt8(NetMinor),
|
||||
toInt16(UserAgentMajor) = -32768 ? -32767 : toInt16(UserAgentMajor),
|
||||
toValidUTF8(toString(UserAgentMinor)),
|
||||
toInt8(CookieEnable) = -128 ? -127 : toInt8(CookieEnable),
|
||||
toInt8(JavascriptEnable) = -128 ? -127 : toInt8(JavascriptEnable),
|
||||
toInt8(IsMobile) = -128 ? -127 : toInt8(IsMobile),
|
||||
toInt8(MobilePhone) = -128 ? -127 : toInt8(MobilePhone),
|
||||
toValidUTF8(toString(MobilePhoneModel)),
|
||||
toValidUTF8(toString(Params)),
|
||||
toInt32(IPNetworkID) = -2147483648 ? -2147483647 : toInt32(IPNetworkID),
|
||||
toInt8(TraficSourceID) = -128 ? -127 : toInt8(TraficSourceID),
|
||||
toInt16(SearchEngineID) = -32768 ? -32767 : toInt16(SearchEngineID),
|
||||
toValidUTF8(toString(SearchPhrase)),
|
||||
toInt8(AdvEngineID) = -128 ? -127 : toInt8(AdvEngineID),
|
||||
toInt8(IsArtifical) = -128 ? -127 : toInt8(IsArtifical),
|
||||
toInt16(WindowClientWidth) = -32768 ? -32767 : toInt16(WindowClientWidth),
|
||||
toInt16(WindowClientHeight) = -32768 ? -32767 : toInt16(WindowClientHeight),
|
||||
toInt16(ClientTimeZone) = -32768 ? -32767 : toInt16(ClientTimeZone),
|
||||
ClientEventTime,
|
||||
toInt8(SilverlightVersion1) = -128 ? -127 : toInt8(SilverlightVersion1),
|
||||
toInt8(SilverlightVersion2) = -128 ? -127 : toInt8(SilverlightVersion2),
|
||||
toInt32(SilverlightVersion3) = -2147483648 ? -2147483647 : toInt32(SilverlightVersion3),
|
||||
toInt16(SilverlightVersion4) = -32768 ? -32767 : toInt16(SilverlightVersion4),
|
||||
toValidUTF8(toString(PageCharset)),
|
||||
toInt32(CodeVersion) = -2147483648 ? -2147483647 : toInt32(CodeVersion),
|
||||
toInt8(IsLink) = -128 ? -127 : toInt8(IsLink),
|
||||
toInt8(IsDownload) = -128 ? -127 : toInt8(IsDownload),
|
||||
toInt8(IsNotBounce) = -128 ? -127 : toInt8(IsNotBounce),
|
||||
toInt64(FUniqID) = -9223372036854775808 ? -9223372036854775807 : toInt64(FUniqID),
|
||||
toValidUTF8(toString(OriginalURL)),
|
||||
toInt32(HID) = -2147483648 ? -2147483647 : toInt32(HID),
|
||||
toInt8(IsOldCounter) = -128 ? -127 : toInt8(IsOldCounter),
|
||||
toInt8(IsEvent) = -128 ? -127 : toInt8(IsEvent),
|
||||
toInt8(IsParameter) = -128 ? -127 : toInt8(IsParameter),
|
||||
toInt8(DontCountHits) = -128 ? -127 : toInt8(DontCountHits),
|
||||
toInt8(WithHash) = -128 ? -127 : toInt8(WithHash),
|
||||
toValidUTF8(toString(HitColor)),
|
||||
LocalEventTime,
|
||||
toInt8(Age) = -128 ? -127 : toInt8(Age),
|
||||
toInt8(Sex) = -128 ? -127 : toInt8(Sex),
|
||||
toInt8(Income) = -128 ? -127 : toInt8(Income),
|
||||
toInt16(Interests) = -32768 ? -32767 : toInt16(Interests),
|
||||
toInt8(Robotness) = -128 ? -127 : toInt8(Robotness),
|
||||
toInt32(RemoteIP) = -2147483648 ? -2147483647 : toInt32(RemoteIP),
|
||||
toInt32(WindowName) = -2147483648 ? -2147483647 : toInt32(WindowName),
|
||||
toInt32(OpenerName) = -2147483648 ? -2147483647 : toInt32(OpenerName),
|
||||
toInt16(HistoryLength) = -32768 ? -32767 : toInt16(HistoryLength),
|
||||
toValidUTF8(toString(BrowserLanguage)),
|
||||
toValidUTF8(toString(BrowserCountry)),
|
||||
toValidUTF8(toString(SocialNetwork)),
|
||||
toValidUTF8(toString(SocialAction)),
|
||||
toInt16(HTTPError) = -32768 ? -32767 : toInt16(HTTPError),
|
||||
toInt32(SendTiming) = -2147483648 ? -2147483647 : toInt32(SendTiming),
|
||||
toInt32(DNSTiming) = -2147483648 ? -2147483647 : toInt32(DNSTiming),
|
||||
toInt32(ConnectTiming) = -2147483648 ? -2147483647 : toInt32(ConnectTiming),
|
||||
toInt32(ResponseStartTiming) = -2147483648 ? -2147483647 : toInt32(ResponseStartTiming),
|
||||
toInt32(ResponseEndTiming) = -2147483648 ? -2147483647 : toInt32(ResponseEndTiming),
|
||||
toInt32(FetchTiming) = -2147483648 ? -2147483647 : toInt32(FetchTiming),
|
||||
toInt8(SocialSourceNetworkID) = -128 ? -127 : toInt8(SocialSourceNetworkID),
|
||||
toValidUTF8(toString(SocialSourcePage)),
|
||||
toInt64(ParamPrice) = -9223372036854775808 ? -9223372036854775807 : toInt64(ParamPrice),
|
||||
toValidUTF8(toString(ParamOrderID)),
|
||||
toValidUTF8(toString(ParamCurrency)),
|
||||
toInt16(ParamCurrencyID) = -32768 ? -32767 : toInt16(ParamCurrencyID),
|
||||
toValidUTF8(toString(OpenstatServiceName)),
|
||||
toValidUTF8(toString(OpenstatCampaignID)),
|
||||
toValidUTF8(toString(OpenstatAdID)),
|
||||
toValidUTF8(toString(OpenstatSourceID)),
|
||||
toValidUTF8(toString(UTMSource)),
|
||||
toValidUTF8(toString(UTMMedium)),
|
||||
toValidUTF8(toString(UTMCampaign)),
|
||||
toValidUTF8(toString(UTMContent)),
|
||||
toValidUTF8(toString(UTMTerm)),
|
||||
toValidUTF8(toString(FromTag)),
|
||||
toInt8(HasGCLID) = -128 ? -127 : toInt8(HasGCLID),
|
||||
toInt64(RefererHash) = -9223372036854775808 ? -9223372036854775807 : toInt64(RefererHash),
|
||||
toInt64(URLHash) = -9223372036854775808 ? -9223372036854775807 : toInt64(URLHash),
|
||||
toInt32(CLID) = -2147483648 ? -2147483647 : toInt32(CLID)
|
||||
FROM hits_100m_obfuscated
|
||||
INTO OUTFILE '/home/milovidov/example_datasets/hits_100m_obfuscated_monetdb.tsv'
|
||||
FORMAT TSV;
|
||||
```
|
||||
|
||||
Note that MonetDB does not support the most negative numbers like -128. And we have to convert them by adding one.
|
||||
It makes impossible to store the values of 64bit identifiers in BIGINT.
|
||||
Maybe it's a trick to optimize NULLs?
|
||||
|
||||
Upload the data:
|
||||
|
||||
```
|
||||
$ mclient -u monetdb -d test
|
||||
```
|
||||
|
||||
Type password: monetdb
|
||||
|
||||
```
|
||||
COPY INTO hits FROM '/home/milovidov/example_datasets/hits_100m_obfuscated_monetdb.tsv' USING DELIMITERS '\t';
|
||||
```
|
||||
|
||||
It takes 28 minutes 02 seconds on a server (Linux Ubuntu, Xeon E5-2560v2, 32 logical CPU, 128 GiB RAM, 8xHDD RAID-5, 40 TB).
|
||||
It is roughly 60 000 rows per second.
|
||||
|
||||
Validate the data:
|
||||
|
||||
```
|
||||
SELECT count(*) FROM hits;
|
||||
```
|
||||
|
||||
Create an index:
|
||||
|
||||
```
|
||||
CREATE INDEX hits_idx ON hits ("CounterID", "EventDate");
|
||||
```
|
||||
|
||||
(it takes 5 seconds)
|
||||
|
||||
Run the benchmark:
|
||||
|
||||
```
|
||||
./benchmark.sh | tee log.txt
|
||||
```
|
||||
|
||||
You can find the log in `log.txt` file.
|
||||
|
||||
Postprocess data:
|
||||
|
||||
```
|
||||
grep clk log.txt | tr -d '\r' | awk '{ if ($3 == "ms") { print $2 / 1000; } else if ($3 == "sec") { print $2 } else { print } }'
|
||||
```
|
||||
|
||||
Then replace values with "min" (minutes) timing manually and save to `tmp.txt`.
|
||||
Then process to JSON format:
|
||||
|
||||
```
|
||||
awk '{
|
||||
if (i % 3 == 0) { a = $1 }
|
||||
else if (i % 3 == 1) { b = $1 }
|
||||
else if (i % 3 == 2) { c = $1; print "[" a ", " b ", " c "]," };
|
||||
++i; }' < tmp.txt
|
||||
```
|
||||
|
||||
And paste to `/website/benchmark/dbms/results/005_monetdb.json` in the repository.
|
@ -1,341 +0,0 @@
|
||||
3
|
||||
SELECT count(*) FROM hits;
|
||||
1 tuple
|
||||
clk: 1.262 ms
|
||||
1 tuple
|
||||
clk: 1.420 ms
|
||||
1 tuple
|
||||
clk: 1.190 ms
|
||||
3
|
||||
SELECT count(*) FROM hits WHERE "AdvEngineID" <> 0;
|
||||
1 tuple
|
||||
clk: 1.530 sec
|
||||
1 tuple
|
||||
clk: 1.489 sec
|
||||
1 tuple
|
||||
clk: 1.490 sec
|
||||
3
|
||||
SELECT sum("AdvEngineID"), count(*), avg("ResolutionWidth") FROM hits;
|
||||
1 tuple
|
||||
clk: 597.512 ms
|
||||
1 tuple
|
||||
clk: 579.383 ms
|
||||
1 tuple
|
||||
clk: 598.220 ms
|
||||
3
|
||||
SELECT sum("UserID") FROM hits;
|
||||
overflow in calculation.
|
||||
clk: 568.003 ms
|
||||
overflow in calculation.
|
||||
clk: 554.572 ms
|
||||
overflow in calculation.
|
||||
clk: 552.076 ms
|
||||
3
|
||||
SELECT COUNT(DISTINCT "UserID") FROM hits;
|
||||
1 tuple
|
||||
clk: 6.688 sec
|
||||
1 tuple
|
||||
clk: 6.689 sec
|
||||
1 tuple
|
||||
clk: 6.652 sec
|
||||
3
|
||||
SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
|
||||
1 tuple
|
||||
clk: 15.702 sec
|
||||
1 tuple
|
||||
clk: 17.189 sec
|
||||
1 tuple
|
||||
clk: 15.514 sec
|
||||
3
|
||||
SELECT min("EventDate"), max("EventDate") FROM hits;
|
||||
1 tuple
|
||||
clk: 697.770 ms
|
||||
1 tuple
|
||||
clk: 711.870 ms
|
||||
1 tuple
|
||||
clk: 697.177 ms
|
||||
3
|
||||
SELECT "AdvEngineID", count(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY count(*) DESC;
|
||||
18 tuples
|
||||
clk: 1.536 sec
|
||||
18 tuples
|
||||
clk: 1.505 sec
|
||||
18 tuples
|
||||
clk: 1.492 sec
|
||||
3
|
||||
SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 9.965 sec
|
||||
10 tuples
|
||||
clk: 10.106 sec
|
||||
10 tuples
|
||||
clk: 10.136 sec
|
||||
3
|
||||
SELECT "RegionID", sum("AdvEngineID"), count(*) AS c, avg("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 8.329 sec
|
||||
10 tuples
|
||||
clk: 8.601 sec
|
||||
10 tuples
|
||||
clk: 8.039 sec
|
||||
3
|
||||
SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 3.385 sec
|
||||
10 tuples
|
||||
clk: 3.321 sec
|
||||
10 tuples
|
||||
clk: 3.326 sec
|
||||
3
|
||||
SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 3.510 sec
|
||||
10 tuples
|
||||
clk: 3.431 sec
|
||||
10 tuples
|
||||
clk: 3.382 sec
|
||||
3
|
||||
SELECT "SearchPhrase", count(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 10.891 sec
|
||||
10 tuples
|
||||
clk: 11.483 sec
|
||||
10 tuples
|
||||
clk: 10.352 sec
|
||||
3
|
||||
SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 15.711 sec
|
||||
10 tuples
|
||||
clk: 15.444 sec
|
||||
10 tuples
|
||||
clk: 15.503 sec
|
||||
3
|
||||
SELECT "SearchEngineID", "SearchPhrase", count(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 11.433 sec
|
||||
10 tuples
|
||||
clk: 11.399 sec
|
||||
10 tuples
|
||||
clk: 11.285 sec
|
||||
3
|
||||
SELECT "UserID", count(*) FROM hits GROUP BY "UserID" ORDER BY count(*) DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 7.184 sec
|
||||
10 tuples
|
||||
clk: 7.015 sec
|
||||
10 tuples
|
||||
clk: 6.849 sec
|
||||
3
|
||||
SELECT "UserID", "SearchPhrase", count(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY count(*) DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 29.096 sec
|
||||
10 tuples
|
||||
clk: 28.328 sec
|
||||
10 tuples
|
||||
clk: 29.247 sec
|
||||
3
|
||||
SELECT "UserID", "SearchPhrase", count(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10;
|
||||
10 tuples
|
||||
clk: 29.457 sec
|
||||
10 tuples
|
||||
clk: 29.364 sec
|
||||
10 tuples
|
||||
clk: 29.269 sec
|
||||
3
|
||||
SELECT "UserID", extract(minute FROM "EventTime") AS m, "SearchPhrase", count(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY count(*) DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 47.141 sec
|
||||
10 tuples
|
||||
clk: 46.495 sec
|
||||
10 tuples
|
||||
clk: 46.472 sec
|
||||
3
|
||||
SELECT "UserID" FROM hits WHERE "UserID" = -6101065172474983726;
|
||||
0 tuples
|
||||
clk: 783.332 ms
|
||||
0 tuples
|
||||
clk: 771.157 ms
|
||||
0 tuples
|
||||
clk: 783.082 ms
|
||||
3
|
||||
SELECT count(*) FROM hits WHERE "URL" LIKE '%metrika%';
|
||||
1 tuple
|
||||
clk: 3.963 sec
|
||||
1 tuple
|
||||
clk: 3.930 sec
|
||||
1 tuple
|
||||
clk: 3.964 sec
|
||||
3
|
||||
SELECT "SearchPhrase", min("URL"), count(*) AS c FROM hits WHERE "URL" LIKE '%metrika%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 3.925 sec
|
||||
10 tuples
|
||||
clk: 3.817 sec
|
||||
10 tuples
|
||||
clk: 3.802 sec
|
||||
3
|
||||
SELECT "SearchPhrase", min("URL"), min("Title"), count(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Яндекс%' AND "URL" NOT LIKE '%.yandex.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 6.067 sec
|
||||
10 tuples
|
||||
clk: 6.120 sec
|
||||
10 tuples
|
||||
clk: 6.012 sec
|
||||
3
|
||||
SELECT * FROM hits WHERE "URL" LIKE '%metrika%' ORDER BY "EventTime" LIMIT 10;
|
||||
10 tuples !87 columns dropped, 29 fields truncated!
|
||||
clk: 4.251 sec
|
||||
10 tuples !87 columns dropped, 29 fields truncated!
|
||||
clk: 4.190 sec
|
||||
10 tuples !87 columns dropped, 29 fields truncated!
|
||||
clk: 4.379 sec
|
||||
3
|
||||
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
|
||||
10 tuples
|
||||
clk: 6.699 sec
|
||||
10 tuples
|
||||
clk: 6.718 sec
|
||||
10 tuples
|
||||
clk: 6.802 sec
|
||||
3
|
||||
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
|
||||
10 tuples
|
||||
clk: 6.887 sec
|
||||
10 tuples
|
||||
clk: 6.838 sec
|
||||
10 tuples
|
||||
clk: 6.844 sec
|
||||
3
|
||||
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
|
||||
10 tuples
|
||||
clk: 6.806 sec
|
||||
10 tuples
|
||||
clk: 6.878 sec
|
||||
10 tuples
|
||||
clk: 6.807 sec
|
||||
3
|
||||
SELECT "CounterID", avg(length("URL")) AS l, count(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
25 tuples
|
||||
clk: 1:01 min
|
||||
25 tuples
|
||||
clk: 55.553 sec
|
||||
25 tuples
|
||||
clk: 56.188 sec
|
||||
3
|
||||
SELECT sys.getdomain("Referer") AS key, avg(length("Referer")) AS l, count(*) AS c, min("Referer") FROM hits WHERE "Referer" <> '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
clk: 1:00 min
|
||||
clk: 1:00 min
|
||||
clk: 1:00 min
|
||||
3
|
||||
SELECT sum("ResolutionWidth"), sum("ResolutionWidth" + 1), sum("ResolutionWidth" + 2), sum("ResolutionWidth" + 3), sum("ResolutionWidth" + 4), sum("ResolutionWidth" + 5), sum("ResolutionWidth" + 6), sum("ResolutionWidth" + 7), sum("ResolutionWidth" + 8), sum("ResolutionWidth" + 9), sum("ResolutionWidth" + 10), sum("ResolutionWidth" + 11), sum("ResolutionWidth" + 12), sum("ResolutionWidth" + 13), sum("ResolutionWidth" + 14), sum("ResolutionWidth" + 15), sum("ResolutionWidth" + 16), sum("ResolutionWidth" + 17), sum("ResolutionWidth" + 18), sum("ResolutionWidth" + 19), sum("ResolutionWidth" + 20), sum("ResolutionWidth" + 21), sum("ResolutionWidth" + 22), sum("ResolutionWidth" + 23), sum("ResolutionWidth" + 24), sum("ResolutionWidth" + 25), sum("ResolutionWidth" + 26), sum("ResolutionWidth" + 27), sum("ResolutionWidth" + 28), sum("ResolutionWidth" + 29), sum("ResolutionWidth" + 30), sum("ResolutionWidth" + 31), sum("ResolutionWidth" + 32), sum("ResolutionWidth" + 33), sum("ResolutionWidth" + 34), sum("ResolutionWidth" + 35), sum("ResolutionWidth" + 36), sum("ResolutionWidth" + 37), sum("ResolutionWidth" + 38), sum("ResolutionWidth" + 39), sum("ResolutionWidth" + 40), sum("ResolutionWidth" + 41), sum("ResolutionWidth" + 42), sum("ResolutionWidth" + 43), sum("ResolutionWidth" + 44), sum("ResolutionWidth" + 45), sum("ResolutionWidth" + 46), sum("ResolutionWidth" + 47), sum("ResolutionWidth" + 48), sum("ResolutionWidth" + 49), sum("ResolutionWidth" + 50), sum("ResolutionWidth" + 51), sum("ResolutionWidth" + 52), sum("ResolutionWidth" + 53), sum("ResolutionWidth" + 54), sum("ResolutionWidth" + 55), sum("ResolutionWidth" + 56), sum("ResolutionWidth" + 57), sum("ResolutionWidth" + 58), sum("ResolutionWidth" + 59), sum("ResolutionWidth" + 60), sum("ResolutionWidth" + 61), sum("ResolutionWidth" + 62), sum("ResolutionWidth" + 63), sum("ResolutionWidth" + 64), sum("ResolutionWidth" + 65), sum("ResolutionWidth" + 66), sum("ResolutionWidth" + 67), sum("ResolutionWidth" + 68), sum("ResolutionWidth" + 69), sum("ResolutionWidth" + 70), sum("ResolutionWidth" + 71), sum("ResolutionWidth" + 72), sum("ResolutionWidth" + 73), sum("ResolutionWidth" + 74), sum("ResolutionWidth" + 75), sum("ResolutionWidth" + 76), sum("ResolutionWidth" + 77), sum("ResolutionWidth" + 78), sum("ResolutionWidth" + 79), sum("ResolutionWidth" + 80), sum("ResolutionWidth" + 81), sum("ResolutionWidth" + 82), sum("ResolutionWidth" + 83), sum("ResolutionWidth" + 84), sum("ResolutionWidth" + 85), sum("ResolutionWidth" + 86), sum("ResolutionWidth" + 87), sum("ResolutionWidth" + 88), sum("ResolutionWidth" + 89) FROM hits;
|
||||
1 tuple !77 columns dropped!
|
||||
clk: 6.221 sec
|
||||
1 tuple !77 columns dropped!
|
||||
clk: 6.170 sec
|
||||
1 tuple !77 columns dropped!
|
||||
clk: 6.382 sec
|
||||
3
|
||||
SELECT "SearchEngineID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 5.684 sec
|
||||
10 tuples
|
||||
clk: 5.585 sec
|
||||
10 tuples
|
||||
clk: 5.463 sec
|
||||
3
|
||||
SELECT "WatchID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 6.281 sec
|
||||
10 tuples
|
||||
clk: 6.574 sec
|
||||
10 tuples
|
||||
clk: 6.243 sec
|
||||
3
|
||||
SELECT "WatchID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 44.641 sec
|
||||
10 tuples
|
||||
clk: 41.904 sec
|
||||
10 tuples
|
||||
clk: 43.218 sec
|
||||
3
|
||||
SELECT "URL", count(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 1:24 min
|
||||
10 tuples
|
||||
clk: 1:31 min
|
||||
10 tuples
|
||||
clk: 1:24 min
|
||||
3
|
||||
SELECT 1, "URL", count(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 1:24 min
|
||||
10 tuples
|
||||
clk: 1:25 min
|
||||
10 tuples
|
||||
clk: 1:24 min
|
||||
3
|
||||
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, count(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 26.438 sec
|
||||
10 tuples
|
||||
clk: 26.033 sec
|
||||
10 tuples
|
||||
clk: 26.147 sec
|
||||
3
|
||||
SELECT "URL", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "Refresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 4.825 sec
|
||||
10 tuples
|
||||
clk: 4.618 sec
|
||||
10 tuples
|
||||
clk: 4.623 sec
|
||||
3
|
||||
SELECT "Title", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "Refresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY "PageViews" DESC LIMIT 10;
|
||||
10 tuples
|
||||
clk: 4.380 sec
|
||||
10 tuples
|
||||
clk: 4.418 sec
|
||||
10 tuples
|
||||
clk: 4.413 sec
|
||||
3
|
||||
SELECT "URL", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 1000;
|
||||
1000 tuples
|
||||
clk: 4.259 sec
|
||||
1000 tuples
|
||||
clk: 4.195 sec
|
||||
1000 tuples
|
||||
clk: 4.195 sec
|
||||
3
|
||||
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END, "URL" ORDER BY "PageViews" DESC LIMIT 1000;
|
||||
1000 tuples
|
||||
clk: 3.233 sec
|
||||
1000 tuples
|
||||
clk: 3.180 sec
|
||||
1000 tuples
|
||||
clk: 3.181 sec
|
||||
3
|
||||
SELECT "URLHash", "EventDate", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 686716256552154761 GROUP BY "URLHash", "EventDate" ORDER BY "PageViews" DESC LIMIT 100;
|
||||
0 tuples
|
||||
clk: 2.656 sec
|
||||
0 tuples
|
||||
clk: 2.557 sec
|
||||
0 tuples
|
||||
clk: 2.561 sec
|
||||
3
|
||||
SELECT "WindowClientWidth", "WindowClientHeight", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 686716256552154761 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY "PageViews" DESC LIMIT 10000;
|
||||
0 tuples
|
||||
clk: 4.161 sec
|
||||
0 tuples
|
||||
clk: 4.243 sec
|
||||
0 tuples
|
||||
clk: 4.166 sec
|
||||
3
|
||||
SELECT DATE_TRUNC('minute', "EventTime") AS "Minute", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-02' AND "Refresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', "EventTime") ORDER BY DATE_TRUNC('minute', "EventTime");
|
||||
0 tuples
|
||||
clk: 4.199 sec
|
||||
0 tuples
|
||||
clk: 4.211 sec
|
||||
0 tuples
|
||||
clk: 4.190 sec
|
@ -1,43 +0,0 @@
|
||||
SELECT count(*) FROM {table};
|
||||
SELECT count(*) FROM {table} WHERE "AdvEngineID" <> 0;
|
||||
SELECT sum("AdvEngineID"), count(*), avg("ResolutionWidth") FROM {table};
|
||||
SELECT sum("UserID") FROM {table};
|
||||
SELECT COUNT(DISTINCT "UserID") FROM {table};
|
||||
SELECT COUNT(DISTINCT "SearchPhrase") FROM {table};
|
||||
SELECT min("EventDate"), max("EventDate") FROM {table};
|
||||
SELECT "AdvEngineID", count(*) FROM {table} WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY count(*) DESC;
|
||||
SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM {table} GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
|
||||
SELECT "RegionID", sum("AdvEngineID"), count(*) AS c, avg("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM {table} GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
|
||||
SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM {table} WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
|
||||
SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM {table} WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
|
||||
SELECT "SearchPhrase", count(*) AS c FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
|
||||
SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
|
||||
SELECT "SearchEngineID", "SearchPhrase", count(*) AS c FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10;
|
||||
SELECT "UserID", count(*) FROM {table} GROUP BY "UserID" ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT "UserID", "SearchPhrase", count(*) FROM {table} GROUP BY "UserID", "SearchPhrase" ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT "UserID", "SearchPhrase", count(*) FROM {table} GROUP BY "UserID", "SearchPhrase" LIMIT 10;
|
||||
SELECT "UserID", extract(minute FROM "EventTime") AS m, "SearchPhrase", count(*) FROM {table} GROUP BY "UserID", m, "SearchPhrase" ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT "UserID" FROM {table} WHERE "UserID" = -6101065172474983726;
|
||||
SELECT count(*) FROM {table} WHERE "URL" LIKE '%metrika%';
|
||||
SELECT "SearchPhrase", min("URL"), count(*) AS c FROM {table} WHERE "URL" LIKE '%metrika%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
|
||||
SELECT "SearchPhrase", min("URL"), min("Title"), count(*) AS c, COUNT(DISTINCT "UserID") FROM {table} WHERE "Title" LIKE '%Яндекс%' AND "URL" NOT LIKE '%.yandex.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM {table} WHERE "URL" LIKE '%metrika%' ORDER BY "EventTime" LIMIT 10;
|
||||
SELECT "SearchPhrase" FROM {table} WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
|
||||
SELECT "SearchPhrase" FROM {table} WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
|
||||
SELECT "SearchPhrase" FROM {table} WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
|
||||
SELECT "CounterID", avg(length("URL")) AS l, count(*) AS c FROM {table} WHERE "URL" <> '' GROUP BY "CounterID" HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sys.getdomain("Referer") AS key, avg(length("Referer")) AS l, count(*) AS c, min("Referer") FROM {table} WHERE "Referer" <> '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum("ResolutionWidth"), sum("ResolutionWidth" + 1), sum("ResolutionWidth" + 2), sum("ResolutionWidth" + 3), sum("ResolutionWidth" + 4), sum("ResolutionWidth" + 5), sum("ResolutionWidth" + 6), sum("ResolutionWidth" + 7), sum("ResolutionWidth" + 8), sum("ResolutionWidth" + 9), sum("ResolutionWidth" + 10), sum("ResolutionWidth" + 11), sum("ResolutionWidth" + 12), sum("ResolutionWidth" + 13), sum("ResolutionWidth" + 14), sum("ResolutionWidth" + 15), sum("ResolutionWidth" + 16), sum("ResolutionWidth" + 17), sum("ResolutionWidth" + 18), sum("ResolutionWidth" + 19), sum("ResolutionWidth" + 20), sum("ResolutionWidth" + 21), sum("ResolutionWidth" + 22), sum("ResolutionWidth" + 23), sum("ResolutionWidth" + 24), sum("ResolutionWidth" + 25), sum("ResolutionWidth" + 26), sum("ResolutionWidth" + 27), sum("ResolutionWidth" + 28), sum("ResolutionWidth" + 29), sum("ResolutionWidth" + 30), sum("ResolutionWidth" + 31), sum("ResolutionWidth" + 32), sum("ResolutionWidth" + 33), sum("ResolutionWidth" + 34), sum("ResolutionWidth" + 35), sum("ResolutionWidth" + 36), sum("ResolutionWidth" + 37), sum("ResolutionWidth" + 38), sum("ResolutionWidth" + 39), sum("ResolutionWidth" + 40), sum("ResolutionWidth" + 41), sum("ResolutionWidth" + 42), sum("ResolutionWidth" + 43), sum("ResolutionWidth" + 44), sum("ResolutionWidth" + 45), sum("ResolutionWidth" + 46), sum("ResolutionWidth" + 47), sum("ResolutionWidth" + 48), sum("ResolutionWidth" + 49), sum("ResolutionWidth" + 50), sum("ResolutionWidth" + 51), sum("ResolutionWidth" + 52), sum("ResolutionWidth" + 53), sum("ResolutionWidth" + 54), sum("ResolutionWidth" + 55), sum("ResolutionWidth" + 56), sum("ResolutionWidth" + 57), sum("ResolutionWidth" + 58), sum("ResolutionWidth" + 59), sum("ResolutionWidth" + 60), sum("ResolutionWidth" + 61), sum("ResolutionWidth" + 62), sum("ResolutionWidth" + 63), sum("ResolutionWidth" + 64), sum("ResolutionWidth" + 65), sum("ResolutionWidth" + 66), sum("ResolutionWidth" + 67), sum("ResolutionWidth" + 68), sum("ResolutionWidth" + 69), sum("ResolutionWidth" + 70), sum("ResolutionWidth" + 71), sum("ResolutionWidth" + 72), sum("ResolutionWidth" + 73), sum("ResolutionWidth" + 74), sum("ResolutionWidth" + 75), sum("ResolutionWidth" + 76), sum("ResolutionWidth" + 77), sum("ResolutionWidth" + 78), sum("ResolutionWidth" + 79), sum("ResolutionWidth" + 80), sum("ResolutionWidth" + 81), sum("ResolutionWidth" + 82), sum("ResolutionWidth" + 83), sum("ResolutionWidth" + 84), sum("ResolutionWidth" + 85), sum("ResolutionWidth" + 86), sum("ResolutionWidth" + 87), sum("ResolutionWidth" + 88), sum("ResolutionWidth" + 89) FROM {table};
|
||||
SELECT "SearchEngineID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
|
||||
SELECT "WatchID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
|
||||
SELECT "WatchID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM {table} GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
|
||||
SELECT "URL", count(*) AS c FROM {table} GROUP BY "URL" ORDER BY c DESC LIMIT 10;
|
||||
SELECT 1, "URL", count(*) AS c FROM {table} GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
|
||||
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, count(*) AS c FROM {table} GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
|
||||
SELECT "URL", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "Refresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10;
|
||||
SELECT "Title", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "Refresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY "PageViews" DESC LIMIT 10;
|
||||
SELECT "URL", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 1000;
|
||||
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END, "URL" ORDER BY "PageViews" DESC LIMIT 1000;
|
||||
SELECT "URLHash", "EventDate", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 686716256552154761 GROUP BY "URLHash", "EventDate" ORDER BY "PageViews" DESC LIMIT 100;
|
||||
SELECT "WindowClientWidth", "WindowClientHeight", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 686716256552154761 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY "PageViews" DESC LIMIT 10000;
|
||||
SELECT DATE_TRUNC('minute', "EventTime") AS "Minute", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-02' AND "Refresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', "EventTime") ORDER BY DATE_TRUNC('minute', "EventTime");
|
@ -1,19 +0,0 @@
|
||||
#!/usr/bin/expect
|
||||
|
||||
# Set timeout
|
||||
set timeout 600
|
||||
|
||||
# Get arguments
|
||||
set query [lindex $argv 0]
|
||||
|
||||
spawn mclient -u monetdb -d test --timer=clock
|
||||
expect "password:"
|
||||
send "monetdb\r"
|
||||
|
||||
expect "sql>"
|
||||
send "$query;\r"
|
||||
|
||||
expect "sql>"
|
||||
send "\\q\r"
|
||||
|
||||
expect eof
|
File diff suppressed because it is too large
Load Diff
@ -1,17 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits/' | while read query; do
|
||||
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches
|
||||
sudo systemctl restart omnisci_server
|
||||
for i in {1..1000}; do
|
||||
/opt/omnisci/bin/omnisql -t -p HyperInteractive <<< "SELECT 1;" 2>&1 | grep -q '1 rows returned' && break;
|
||||
sleep 0.1;
|
||||
done
|
||||
sleep 10;
|
||||
|
||||
echo "$query";
|
||||
for i in {1..3}; do
|
||||
/opt/omnisci/bin/omnisql -t -p HyperInteractive <<< "$query" 2>&1 | grep -P 'Exception:|Execution time:';
|
||||
done;
|
||||
done;
|
@ -1,332 +0,0 @@
|
||||
# Instruction to run benchmark for OmniSci on web-analytics dataset
|
||||
|
||||
OmniSci (former name "MapD") is open-source (open-core) in-memory analytical DBMS with support for GPU processing.
|
||||
It can run on CPU without GPU as well. It can show competitive performance on simple queries (like - simple aggregation on a single column).
|
||||
|
||||
# How to install
|
||||
|
||||
https://docs.omnisci.com/installation-and-configuration/installation/installing-on-ubuntu
|
||||
|
||||
# Caveats
|
||||
|
||||
- Dataset (at least needed columns) must fit in memory.
|
||||
- It does not support data compression (only dictionary encoding for strings).
|
||||
- First query execution is very slow because uncompressed data is read from disk.
|
||||
- It does not support index for quick range queries.
|
||||
- It does not support NOT NULL for data types.
|
||||
- It does not support BLOB.
|
||||
- No support for UNSIGNED data type (it's Ok according to SQL standard).
|
||||
- Lack of string processing functions.
|
||||
- Strings are limited to 32767 bytes.
|
||||
- GROUP BY on text data type is supported only if it has dictionary encoding.
|
||||
`Exception: Cannot group by string columns which are not dictionary encoded`
|
||||
- Some aggregate functions are not supported for strings at all.
|
||||
`Aggregate on TEXT is not supported yet.`
|
||||
- Sometimes I hit a bug when query is run in infinite loop and does not finish (after retry it's finished successfully).
|
||||
- One query executed in hours even with retries.
|
||||
- Sorting is slow and disabled with default settings for large resultsets.
|
||||
`Exception: Sorting the result would be too slow`
|
||||
`Cast from dictionary-encoded string to none-encoded would be slow`
|
||||
- There is approximate count distinct function but the precision is not documented.
|
||||
|
||||
To enable sorting of large resultsets, see:
|
||||
https://stackoverflow.com/questions/62977734/omnissci-sorting-the-result-would-be-too-slow
|
||||
|
||||
The list of known issues is here:
|
||||
https://github.com/omnisci/omniscidb/issues?q=is%3Aissue+author%3Aalexey-milovidov
|
||||
|
||||
# How to prepare data
|
||||
|
||||
Download the 100 million rows dataset from here and insert into ClickHouse:
|
||||
https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/
|
||||
|
||||
Convert the CREATE TABLE query:
|
||||
|
||||
```
|
||||
clickhouse-client --query "SHOW CREATE TABLE hits_100m" --format TSVRaw |
|
||||
tr '`' '"' |
|
||||
sed -r -e '
|
||||
s/U?Int64/BIGINT/;
|
||||
s/U?Int32/INTEGER/;
|
||||
s/U?Int16/SMALLINT/;
|
||||
s/U?Int8/TINYINT/;
|
||||
s/DateTime/TIMESTAMP ENCODING FIXED(32)/;
|
||||
s/ Date/ DATE ENCODING DAYS(16)/;
|
||||
s/FixedString\(2\)/TEXT ENCODING DICT(16)/;
|
||||
s/FixedString\(3\)/TEXT ENCODING DICT/;
|
||||
s/FixedString\(\d+\)/TEXT ENCODING DICT/;
|
||||
s/String/TEXT ENCODING DICT/;'
|
||||
```
|
||||
And cut `ENGINE` part.
|
||||
|
||||
The resulting CREATE TABLE query:
|
||||
```
|
||||
CREATE TABLE hits
|
||||
(
|
||||
"WatchID" BIGINT,
|
||||
"JavaEnable" TINYINT,
|
||||
"Title" TEXT ENCODING DICT,
|
||||
"GoodEvent" SMALLINT,
|
||||
"EventTime" TIMESTAMP ENCODING FIXED(32),
|
||||
"EventDate" ENCODING DAYS(16) Date,
|
||||
"CounterID" INTEGER,
|
||||
"ClientIP" INTEGER,
|
||||
"RegionID" INTEGER,
|
||||
"UserID" BIGINT,
|
||||
"CounterClass" TINYINT,
|
||||
"OS" TINYINT,
|
||||
"UserAgent" TINYINT,
|
||||
"URL" TEXT ENCODING DICT,
|
||||
"Referer" TEXT ENCODING DICT,
|
||||
"Refresh" TINYINT,
|
||||
"RefererCategoryID" SMALLINT,
|
||||
"RefererRegionID" INTEGER,
|
||||
"URLCategoryID" SMALLINT,
|
||||
"URLRegionID" INTEGER,
|
||||
"ResolutionWidth" SMALLINT,
|
||||
"ResolutionHeight" SMALLINT,
|
||||
"ResolutionDepth" TINYINT,
|
||||
"FlashMajor" TINYINT,
|
||||
"FlashMinor" TINYINT,
|
||||
"FlashMinor2" TEXT ENCODING DICT,
|
||||
"NetMajor" TINYINT,
|
||||
"NetMinor" TINYINT,
|
||||
"UserAgentMajor" SMALLINT,
|
||||
"UserAgentMinor" TEXT ENCODING DICT(16),
|
||||
"CookieEnable" TINYINT,
|
||||
"JavascriptEnable" TINYINT,
|
||||
"IsMobile" TINYINT,
|
||||
"MobilePhone" TINYINT,
|
||||
"MobilePhoneModel" TEXT ENCODING DICT,
|
||||
"Params" TEXT ENCODING DICT,
|
||||
"IPNetworkID" INTEGER,
|
||||
"TraficSourceID" TINYINT,
|
||||
"SearchEngineID" SMALLINT,
|
||||
"SearchPhrase" TEXT ENCODING DICT,
|
||||
"AdvEngineID" TINYINT,
|
||||
"IsArtifical" TINYINT,
|
||||
"WindowClientWidth" SMALLINT,
|
||||
"WindowClientHeight" SMALLINT,
|
||||
"ClientTimeZone" SMALLINT,
|
||||
"ClientEventTime" TIMESTAMP ENCODING FIXED(32),
|
||||
"SilverlightVersion1" TINYINT,
|
||||
"SilverlightVersion2" TINYINT,
|
||||
"SilverlightVersion3" INTEGER,
|
||||
"SilverlightVersion4" SMALLINT,
|
||||
"PageCharset" TEXT ENCODING DICT,
|
||||
"CodeVersion" INTEGER,
|
||||
"IsLink" TINYINT,
|
||||
"IsDownload" TINYINT,
|
||||
"IsNotBounce" TINYINT,
|
||||
"FUniqID" BIGINT,
|
||||
"OriginalURL" TEXT ENCODING DICT,
|
||||
"HID" INTEGER,
|
||||
"IsOldCounter" TINYINT,
|
||||
"IsEvent" TINYINT,
|
||||
"IsParameter" TINYINT,
|
||||
"DontCountHits" TINYINT,
|
||||
"WithHash" TINYINT,
|
||||
"HitColor" TEXT ENCODING DICT(8),
|
||||
"LocalEventTime" TIMESTAMP ENCODING FIXED(32),
|
||||
"Age" TINYINT,
|
||||
"Sex" TINYINT,
|
||||
"Income" TINYINT,
|
||||
"Interests" SMALLINT,
|
||||
"Robotness" TINYINT,
|
||||
"RemoteIP" INTEGER,
|
||||
"WindowName" INTEGER,
|
||||
"OpenerName" INTEGER,
|
||||
"HistoryLength" SMALLINT,
|
||||
"BrowserLanguage" TEXT ENCODING DICT(16),
|
||||
"BrowserCountry" TEXT ENCODING DICT(16),
|
||||
"SocialNetwork" TEXT ENCODING DICT,
|
||||
"SocialAction" TEXT ENCODING DICT,
|
||||
"HTTPError" SMALLINT,
|
||||
"SendTiming" INTEGER,
|
||||
"DNSTiming" INTEGER,
|
||||
"ConnectTiming" INTEGER,
|
||||
"ResponseStartTiming" INTEGER,
|
||||
"ResponseEndTiming" INTEGER,
|
||||
"FetchTiming" INTEGER,
|
||||
"SocialSourceNetworkID" TINYINT,
|
||||
"SocialSourcePage" TEXT ENCODING DICT,
|
||||
"ParamPrice" BIGINT,
|
||||
"ParamOrderID" TEXT ENCODING DICT,
|
||||
"ParamCurrency" TEXT ENCODING DICT,
|
||||
"ParamCurrencyID" SMALLINT,
|
||||
"OpenstatServiceName" TEXT ENCODING DICT,
|
||||
"OpenstatCampaignID" TEXT ENCODING DICT,
|
||||
"OpenstatAdID" TEXT ENCODING DICT,
|
||||
"OpenstatSourceID" TEXT ENCODING DICT,
|
||||
"UTMSource" TEXT ENCODING DICT,
|
||||
"UTMMedium" TEXT ENCODING DICT,
|
||||
"UTMCampaign" TEXT ENCODING DICT,
|
||||
"UTMContent" TEXT ENCODING DICT,
|
||||
"UTMTerm" TEXT ENCODING DICT,
|
||||
"FromTag" TEXT ENCODING DICT,
|
||||
"HasGCLID" TINYINT,
|
||||
"RefererHash" BIGINT,
|
||||
"URLHash" BIGINT,
|
||||
"CLID" INTEGER
|
||||
);
|
||||
```
|
||||
|
||||
Convert the dataset, prepare the list of fields for SELECT:
|
||||
|
||||
```
|
||||
clickhouse-client --query "SHOW CREATE TABLE hits_100m" --format TSVRaw |
|
||||
tr '`' '"' |
|
||||
sed -r -e '
|
||||
s/"(\w+)" U?Int([0-9]+)/toInt\2(\1)/;
|
||||
s/"(\w+)" (Fixed)?String(\([0-9]+\))?/toValidUTF8(toString(\1))/;
|
||||
s/"(\w+)" \w+/\1/'
|
||||
```
|
||||
|
||||
The resulting SELECT query for data preparation:
|
||||
|
||||
```
|
||||
SELECT
|
||||
toInt64(WatchID),
|
||||
toInt8(JavaEnable),
|
||||
toValidUTF8(toString(Title)),
|
||||
toInt16(GoodEvent),
|
||||
EventTime,
|
||||
EventDate,
|
||||
toInt32(CounterID),
|
||||
toInt32(ClientIP),
|
||||
toInt32(RegionID),
|
||||
toInt64(UserID),
|
||||
toInt8(CounterClass),
|
||||
toInt8(OS),
|
||||
toInt8(UserAgent),
|
||||
toValidUTF8(toString(URL)),
|
||||
toValidUTF8(toString(Referer)),
|
||||
toInt8(Refresh),
|
||||
toInt16(RefererCategoryID),
|
||||
toInt32(RefererRegionID),
|
||||
toInt16(URLCategoryID),
|
||||
toInt32(URLRegionID),
|
||||
toInt16(ResolutionWidth),
|
||||
toInt16(ResolutionHeight),
|
||||
toInt8(ResolutionDepth),
|
||||
toInt8(FlashMajor),
|
||||
toInt8(FlashMinor),
|
||||
toValidUTF8(toString(FlashMinor2)),
|
||||
toInt8(NetMajor),
|
||||
toInt8(NetMinor),
|
||||
toInt16(UserAgentMajor),
|
||||
toValidUTF8(toString(UserAgentMinor)),
|
||||
toInt8(CookieEnable),
|
||||
toInt8(JavascriptEnable),
|
||||
toInt8(IsMobile),
|
||||
toInt8(MobilePhone),
|
||||
toValidUTF8(toString(MobilePhoneModel)),
|
||||
toValidUTF8(toString(Params)),
|
||||
toInt32(IPNetworkID),
|
||||
toInt8(TraficSourceID),
|
||||
toInt16(SearchEngineID),
|
||||
toValidUTF8(toString(SearchPhrase)),
|
||||
toInt8(AdvEngineID),
|
||||
toInt8(IsArtifical),
|
||||
toInt16(WindowClientWidth),
|
||||
toInt16(WindowClientHeight),
|
||||
toInt16(ClientTimeZone),
|
||||
ClientEventTime,
|
||||
toInt8(SilverlightVersion1),
|
||||
toInt8(SilverlightVersion2),
|
||||
toInt32(SilverlightVersion3),
|
||||
toInt16(SilverlightVersion4),
|
||||
toValidUTF8(toString(PageCharset)),
|
||||
toInt32(CodeVersion),
|
||||
toInt8(IsLink),
|
||||
toInt8(IsDownload),
|
||||
toInt8(IsNotBounce),
|
||||
toInt64(FUniqID),
|
||||
toValidUTF8(toString(OriginalURL)),
|
||||
toInt32(HID),
|
||||
toInt8(IsOldCounter),
|
||||
toInt8(IsEvent),
|
||||
toInt8(IsParameter),
|
||||
toInt8(DontCountHits),
|
||||
toInt8(WithHash),
|
||||
toValidUTF8(toString(HitColor)),
|
||||
LocalEventTime,
|
||||
toInt8(Age),
|
||||
toInt8(Sex),
|
||||
toInt8(Income),
|
||||
toInt16(Interests),
|
||||
toInt8(Robotness),
|
||||
toInt32(RemoteIP),
|
||||
toInt32(WindowName),
|
||||
toInt32(OpenerName),
|
||||
toInt16(HistoryLength),
|
||||
toValidUTF8(toString(BrowserLanguage)),
|
||||
toValidUTF8(toString(BrowserCountry)),
|
||||
toValidUTF8(toString(SocialNetwork)),
|
||||
toValidUTF8(toString(SocialAction)),
|
||||
toInt16(HTTPError),
|
||||
toInt32(SendTiming),
|
||||
toInt32(DNSTiming),
|
||||
toInt32(ConnectTiming),
|
||||
toInt32(ResponseStartTiming),
|
||||
toInt32(ResponseEndTiming),
|
||||
toInt32(FetchTiming),
|
||||
toInt8(SocialSourceNetworkID),
|
||||
toValidUTF8(toString(SocialSourcePage)),
|
||||
toInt64(ParamPrice),
|
||||
toValidUTF8(toString(ParamOrderID)),
|
||||
toValidUTF8(toString(ParamCurrency)),
|
||||
toInt16(ParamCurrencyID),
|
||||
toValidUTF8(toString(OpenstatServiceName)),
|
||||
toValidUTF8(toString(OpenstatCampaignID)),
|
||||
toValidUTF8(toString(OpenstatAdID)),
|
||||
toValidUTF8(toString(OpenstatSourceID)),
|
||||
toValidUTF8(toString(UTMSource)),
|
||||
toValidUTF8(toString(UTMMedium)),
|
||||
toValidUTF8(toString(UTMCampaign)),
|
||||
toValidUTF8(toString(UTMContent)),
|
||||
toValidUTF8(toString(UTMTerm)),
|
||||
toValidUTF8(toString(FromTag)),
|
||||
toInt8(HasGCLID),
|
||||
toInt64(RefererHash),
|
||||
toInt64(URLHash),
|
||||
toInt32(CLID)
|
||||
FROM hits_100m_obfuscated
|
||||
INTO OUTFILE '/home/milovidov/example_datasets/hits_100m_obfuscated.csv'
|
||||
FORMAT CSV;
|
||||
```
|
||||
|
||||
Upload data to OmniSci:
|
||||
```
|
||||
/opt/omnisci/bin/omnisql -t -p HyperInteractive
|
||||
```
|
||||
Run CREATE TABLE statement, then run:
|
||||
```
|
||||
COPY hits FROM '/home/milovidov/example_datasets/hits_100m_obfuscated.csv' WITH (HEADER = 'false');
|
||||
```
|
||||
|
||||
Data loading took
|
||||
```
|
||||
336639 ms
|
||||
```
|
||||
on a server (Linux Ubuntu, Xeon E5-2560v2, 32 logical CPU, 128 GiB RAM, 8xHDD RAID-5, 40 TB).
|
||||
|
||||
Run benchmark:
|
||||
|
||||
```
|
||||
./benchmark.sh
|
||||
```
|
||||
|
||||
Prepare the result to paste into JSON:
|
||||
|
||||
```
|
||||
grep -oP 'Total time: \d+' log.txt |
|
||||
grep -oP '\d+' |
|
||||
awk '{
|
||||
if (i % 3 == 0) { a = $1 }
|
||||
else if (i % 3 == 1) { b = $1 }
|
||||
else if (i % 3 == 2) { c = $1; print "[" a / 1000 ", " b / 1000 ", " c / 1000 "]," };
|
||||
++i; }'
|
||||
```
|
||||
|
||||
And fill out `[null, null, null]` for missing runs.
|
@ -1,210 +0,0 @@
|
||||
3
|
||||
SELECT count(*) FROM hits;
|
||||
Execution time: 23471 ms, Total time: 23471 ms
|
||||
Execution time: 42 ms, Total time: 43 ms
|
||||
Execution time: 35 ms, Total time: 35 ms
|
||||
3
|
||||
SELECT count(*) FROM hits WHERE AdvEngineID != 0;
|
||||
Execution time: 17328 ms, Total time: 17329 ms
|
||||
Execution time: 58 ms, Total time: 59 ms
|
||||
Execution time: 57 ms, Total time: 59 ms
|
||||
3
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
|
||||
Execution time: 17309 ms, Total time: 17310 ms
|
||||
Execution time: 115 ms, Total time: 115 ms
|
||||
Execution time: 129 ms, Total time: 130 ms
|
||||
3
|
||||
SELECT sum(UserID) FROM hits;
|
||||
Execution time: 26091 ms, Total time: 26091 ms
|
||||
Execution time: 88 ms, Total time: 89 ms
|
||||
Execution time: 71 ms, Total time: 72 ms
|
||||
3
|
||||
SELECT APPROX_COUNT_DISTINCT(UserID) FROM hits;
|
||||
Execution time: 21720 ms, Total time: 21720 ms
|
||||
Execution time: 364 ms, Total time: 364 ms
|
||||
Execution time: 344 ms, Total time: 345 ms
|
||||
3
|
||||
SELECT APPROX_COUNT_DISTINCT(SearchPhrase) FROM hits;
|
||||
Execution time: 19314 ms, Total time: 19315 ms
|
||||
Execution time: 385 ms, Total time: 386 ms
|
||||
Execution time: 382 ms, Total time: 382 ms
|
||||
3
|
||||
SELECT min(EventDate), max(EventDate) FROM hits;
|
||||
Execution time: 19431 ms, Total time: 19432 ms
|
||||
Execution time: 130 ms, Total time: 131 ms
|
||||
Execution time: 147 ms, Total time: 148 ms
|
||||
3
|
||||
SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
Execution time: 20660 ms, Total time: 20661 ms
|
||||
Execution time: 63 ms, Total time: 64 ms
|
||||
Execution time: 88 ms, Total time: 89 ms
|
||||
3
|
||||
SELECT RegionID, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
Execution time: 21364 ms, Total time: 21472 ms
|
||||
Execution time: 1387 ms, Total time: 1504 ms
|
||||
Execution time: 1443 ms, Total time: 1505 ms
|
||||
3
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), APPROX_COUNT_DISTINCT(UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
Execution time: 22205 ms, Total time: 22285 ms
|
||||
Execution time: 1590 ms, Total time: 1655 ms
|
||||
Execution time: 1591 ms, Total time: 1658 ms
|
||||
3
|
||||
SELECT MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
Execution time: 22343 ms, Total time: 22344 ms
|
||||
Execution time: 122 ms, Total time: 123 ms
|
||||
Execution time: 117 ms, Total time: 118 ms
|
||||
3
|
||||
SELECT MobilePhone, MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
Execution time: 21681 ms, Total time: 21695 ms
|
||||
Execution time: 299 ms, Total time: 310 ms
|
||||
Execution time: 275 ms, Total time: 292 ms
|
||||
3
|
||||
SELECT SearchPhrase, count(*) AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
Execution time: 23346 ms, Total time: 23360 ms
|
||||
Execution time: 613 ms, Total time: 631 ms
|
||||
Execution time: 606 ms, Total time: 624 ms
|
||||
3
|
||||
SELECT SearchPhrase, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
Execution time: 66014 ms, Total time: 68618 ms
|
||||
Execution time: 44309 ms, Total time: 47296 ms
|
||||
Execution time: 44019 ms, Total time: 46866 ms
|
||||
3
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
Execution time: 25853 ms, Total time: 25984 ms
|
||||
Execution time: 2590 ms, Total time: 2728 ms
|
||||
Execution time: 2652 ms, Total time: 2789 ms
|
||||
3
|
||||
SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
Execution time: 26581 ms, Total time: 26953 ms
|
||||
Execution time: 5843 ms, Total time: 6158 ms
|
||||
Execution time: 5970 ms, Total time: 6286 ms
|
||||
3
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
Execution time: 33007 ms, Total time: 33581 ms
|
||||
Execution time: 9943 ms, Total time: 10509 ms
|
||||
Execution time: 9470 ms, Total time: 10047 ms
|
||||
3
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
Execution time: 39009 ms, Total time: 39575 ms
|
||||
Execution time: 8151 ms, Total time: 8785 ms
|
||||
Execution time: 8037 ms, Total time: 8665 ms
|
||||
3
|
||||
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
Execution time: 56207 ms, Total time: 57764 ms
|
||||
Execution time: 26653 ms, Total time: 28199 ms
|
||||
Execution time: 25614 ms, Total time: 27336 ms
|
||||
3
|
||||
SELECT UserID FROM hits WHERE UserID = -6101065172474983726;
|
||||
Execution time: 18975 ms, Total time: 18976 ms
|
||||
Execution time: 136 ms, Total time: 136 ms
|
||||
Execution time: 136 ms, Total time: 136 ms
|
||||
3
|
||||
SELECT count(*) FROM hits WHERE URL LIKE '%metrika%';
|
||||
Execution time: 32444 ms, Total time: 32445 ms
|
||||
Execution time: 125 ms, Total time: 126 ms
|
||||
Execution time: 134 ms, Total time: 136 ms
|
||||
3
|
||||
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
Exception: Aggregate on TEXT is not supported yet.
|
||||
Exception: Aggregate on TEXT is not supported yet.
|
||||
Exception: Aggregate on TEXT is not supported yet.
|
||||
3
|
||||
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, APPROX_COUNT_DISTINCT(UserID) FROM hits WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
Exception: Aggregate on TEXT is not supported yet.
|
||||
Exception: Aggregate on TEXT is not supported yet.
|
||||
Exception: Aggregate on TEXT is not supported yet.
|
||||
3
|
||||
SELECT * FROM hits WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
Execution time: 96163 ms, Total time: 96166 ms
|
||||
Execution time: 312 ms, Total time: 314 ms
|
||||
Execution time: 303 ms, Total time: 305 ms
|
||||
3
|
||||
SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
Execution time: 27493 ms, Total time: 27494 ms
|
||||
Execution time: 216 ms, Total time: 216 ms
|
||||
Execution time: 221 ms, Total time: 222 ms
|
||||
3
|
||||
SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
Execution time: 38230 ms, Total time: 38308 ms
|
||||
Execution time: 17175 ms, Total time: 17256 ms
|
||||
Execution time: 17225 ms, Total time: 17310 ms
|
||||
3
|
||||
SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
Execution time: 115614 ms, Total time: 115714 ms
|
||||
Execution time: 95944 ms, Total time: 96041 ms
|
||||
Execution time: 94274 ms, Total time: 94383 ms
|
||||
3
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
|
||||
Execution time: 31775 ms, Total time: 31779 ms
|
||||
Execution time: 2643 ms, Total time: 2647 ms
|
||||
Execution time: 2933 ms, Total time: 2937 ms
|
||||
3
|
||||
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
|
||||
Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
|
||||
Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
|
||||
Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
|
||||
3
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
|
||||
Execution time: 28853 ms, Total time: 28854 ms
|
||||
Execution time: 5654 ms, Total time: 5655 ms
|
||||
Execution time: 5579 ms, Total time: 5581 ms
|
||||
3
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
Execution time: 31694 ms, Total time: 31925 ms
|
||||
Execution time: 3872 ms, Total time: 4142 ms
|
||||
Execution time: 3928 ms, Total time: 4162 ms
|
||||
3
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
Execution time: 43690 ms, Total time: 44297 ms
|
||||
Execution time: 8221 ms, Total time: 8825 ms
|
||||
Execution time: 8115 ms, Total time: 8711 ms
|
||||
3
|
||||
SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
Execution time: 29669 ms, Total time: 29715 ms
|
||||
Execution time: 1623 ms, Total time: 1669 ms
|
||||
Execution time: 1534 ms, Total time: 1586 ms
|
||||
3
|
||||
SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
Execution time: 34860 ms, Total time: 35201 ms
|
||||
Execution time: 7075 ms, Total time: 7414 ms
|
||||
Execution time: 7164 ms, Total time: 7567 ms
|
||||
3
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
Execution time: 26467 ms, Total time: 26724 ms
|
||||
Execution time: 5740 ms, Total time: 6026 ms
|
||||
Execution time: 5667 ms, Total time: 5920 ms
|
||||
3
|
||||
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
Execution time: 31899 ms, Total time: 31908 ms
|
||||
Execution time: 1141 ms, Total time: 1154 ms
|
||||
Execution time: 1155 ms, Total time: 1168 ms
|
||||
3
|
||||
SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
Execution time: 27991 ms, Total time: 27997 ms
|
||||
Execution time: 719 ms, Total time: 724 ms
|
||||
Execution time: 737 ms, Total time: 744 ms
|
||||
3
|
||||
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
Execution time: 34651 ms, Total time: 34661 ms
|
||||
Execution time: 1182 ms, Total time: 1200 ms
|
||||
Execution time: 1142 ms, Total time: 1159 ms
|
||||
3
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
Execution time: 30130 ms, Total time: 30136 ms
|
||||
Execution time: 461 ms, Total time: 467 ms
|
||||
Execution time: 445 ms, Total time: 451 ms
|
||||
3
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
Execution time: 19989 ms, Total time: 19991 ms
|
||||
Execution time: 326 ms, Total time: 327 ms
|
||||
Execution time: 325 ms, Total time: 326 ms
|
||||
3
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
Execution time: 18658 ms, Total time: 18660 ms
|
||||
Execution time: 265 ms, Total time: 266 ms
|
||||
Execution time: 254 ms, Total time: 255 ms
|
||||
3
|
||||
SELECT DATE_TRUNC(minute, EventTime) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "Refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC(minute, EventTime) ORDER BY DATE_TRUNC(minute, EventTime);
|
||||
Execution time: 25225 ms, Total time: 25227 ms
|
||||
Execution time: 210 ms, Total time: 212 ms
|
||||
Execution time: 199 ms, Total time: 200 ms
|
@ -1,43 +0,0 @@
|
||||
SELECT count(*) FROM {table};
|
||||
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
|
||||
SELECT sum(UserID) FROM {table};
|
||||
SELECT APPROX_COUNT_DISTINCT(UserID) FROM {table};
|
||||
SELECT APPROX_COUNT_DISTINCT(SearchPhrase) FROM {table};
|
||||
SELECT min(EventDate), max(EventDate) FROM {table};
|
||||
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
SELECT RegionID, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), APPROX_COUNT_DISTINCT(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
SELECT MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT MobilePhone, MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
|
||||
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
|
||||
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, APPROX_COUNT_DISTINCT(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
#SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT DATE_TRUNC(minute, EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "Refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC(minute, EventTime) ORDER BY DATE_TRUNC(minute, EventTime);
|
@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_pg/' | while read query; do
|
||||
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches
|
||||
|
||||
echo "$query";
|
||||
for i in {1..3}; do
|
||||
# For some reason JIT does not work on my machine
|
||||
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
|
||||
done;
|
||||
done;
|
@ -1,142 +0,0 @@
|
||||
Create a table in PostgreSQL:
|
||||
|
||||
```
|
||||
CREATE TABLE hits_100m_pg
|
||||
(
|
||||
WatchID BIGINT NOT NULL,
|
||||
JavaEnable SMALLINT NOT NULL,
|
||||
Title TEXT NOT NULL,
|
||||
GoodEvent SMALLINT NOT NULL,
|
||||
EventTime TIMESTAMP NOT NULL,
|
||||
EventDate Date NOT NULL,
|
||||
CounterID INTEGER NOT NULL,
|
||||
ClientIP INTEGER NOT NULL,
|
||||
RegionID INTEGER NOT NULL,
|
||||
UserID BIGINT NOT NULL,
|
||||
CounterClass SMALLINT NOT NULL,
|
||||
OS SMALLINT NOT NULL,
|
||||
UserAgent SMALLINT NOT NULL,
|
||||
URL TEXT NOT NULL,
|
||||
Referer TEXT NOT NULL,
|
||||
Refresh SMALLINT NOT NULL,
|
||||
RefererCategoryID SMALLINT NOT NULL,
|
||||
RefererRegionID INTEGER NOT NULL,
|
||||
URLCategoryID SMALLINT NOT NULL,
|
||||
URLRegionID INTEGER NOT NULL,
|
||||
ResolutionWidth SMALLINT NOT NULL,
|
||||
ResolutionHeight SMALLINT NOT NULL,
|
||||
ResolutionDepth SMALLINT NOT NULL,
|
||||
FlashMajor SMALLINT NOT NULL,
|
||||
FlashMinor SMALLINT NOT NULL,
|
||||
FlashMinor2 TEXT NOT NULL,
|
||||
NetMajor SMALLINT NOT NULL,
|
||||
NetMinor SMALLINT NOT NULL,
|
||||
UserAgentMajor SMALLINT NOT NULL,
|
||||
UserAgentMinor CHAR(2) NOT NULL,
|
||||
CookieEnable SMALLINT NOT NULL,
|
||||
JavascriptEnable SMALLINT NOT NULL,
|
||||
IsMobile SMALLINT NOT NULL,
|
||||
MobilePhone SMALLINT NOT NULL,
|
||||
MobilePhoneModel TEXT NOT NULL,
|
||||
Params TEXT NOT NULL,
|
||||
IPNetworkID INTEGER NOT NULL,
|
||||
TraficSourceID SMALLINT NOT NULL,
|
||||
SearchEngineID SMALLINT NOT NULL,
|
||||
SearchPhrase TEXT NOT NULL,
|
||||
AdvEngineID SMALLINT NOT NULL,
|
||||
IsArtifical SMALLINT NOT NULL,
|
||||
WindowClientWidth SMALLINT NOT NULL,
|
||||
WindowClientHeight SMALLINT NOT NULL,
|
||||
ClientTimeZone SMALLINT NOT NULL,
|
||||
ClientEventTime TIMESTAMP NOT NULL,
|
||||
SilverlightVersion1 SMALLINT NOT NULL,
|
||||
SilverlightVersion2 SMALLINT NOT NULL,
|
||||
SilverlightVersion3 INTEGER NOT NULL,
|
||||
SilverlightVersion4 SMALLINT NOT NULL,
|
||||
PageCharset TEXT NOT NULL,
|
||||
CodeVersion INTEGER NOT NULL,
|
||||
IsLink SMALLINT NOT NULL,
|
||||
IsDownload SMALLINT NOT NULL,
|
||||
IsNotBounce SMALLINT NOT NULL,
|
||||
FUniqID BIGINT NOT NULL,
|
||||
OriginalURL TEXT NOT NULL,
|
||||
HID INTEGER NOT NULL,
|
||||
IsOldCounter SMALLINT NOT NULL,
|
||||
IsEvent SMALLINT NOT NULL,
|
||||
IsParameter SMALLINT NOT NULL,
|
||||
DontCountHits SMALLINT NOT NULL,
|
||||
WithHash SMALLINT NOT NULL,
|
||||
HitColor CHAR NOT NULL,
|
||||
LocalEventTime TIMESTAMP NOT NULL,
|
||||
Age SMALLINT NOT NULL,
|
||||
Sex SMALLINT NOT NULL,
|
||||
Income SMALLINT NOT NULL,
|
||||
Interests SMALLINT NOT NULL,
|
||||
Robotness SMALLINT NOT NULL,
|
||||
RemoteIP INTEGER NOT NULL,
|
||||
WindowName INTEGER NOT NULL,
|
||||
OpenerName INTEGER NOT NULL,
|
||||
HistoryLength SMALLINT NOT NULL,
|
||||
BrowserLanguage TEXT NOT NULL,
|
||||
BrowserCountry TEXT NOT NULL,
|
||||
SocialNetwork TEXT NOT NULL,
|
||||
SocialAction TEXT NOT NULL,
|
||||
HTTPError SMALLINT NOT NULL,
|
||||
SendTiming INTEGER NOT NULL,
|
||||
DNSTiming INTEGER NOT NULL,
|
||||
ConnectTiming INTEGER NOT NULL,
|
||||
ResponseStartTiming INTEGER NOT NULL,
|
||||
ResponseEndTiming INTEGER NOT NULL,
|
||||
FetchTiming INTEGER NOT NULL,
|
||||
SocialSourceNetworkID SMALLINT NOT NULL,
|
||||
SocialSourcePage TEXT NOT NULL,
|
||||
ParamPrice BIGINT NOT NULL,
|
||||
ParamOrderID TEXT NOT NULL,
|
||||
ParamCurrency TEXT NOT NULL,
|
||||
ParamCurrencyID SMALLINT NOT NULL,
|
||||
OpenstatServiceName TEXT NOT NULL,
|
||||
OpenstatCampaignID TEXT NOT NULL,
|
||||
OpenstatAdID TEXT NOT NULL,
|
||||
OpenstatSourceID TEXT NOT NULL,
|
||||
UTMSource TEXT NOT NULL,
|
||||
UTMMedium TEXT NOT NULL,
|
||||
UTMCampaign TEXT NOT NULL,
|
||||
UTMContent TEXT NOT NULL,
|
||||
UTMTerm TEXT NOT NULL,
|
||||
FromTag TEXT NOT NULL,
|
||||
HasGCLID SMALLINT NOT NULL,
|
||||
RefererHash BIGINT NOT NULL,
|
||||
URLHash BIGINT NOT NULL,
|
||||
CLID INTEGER NOT NULL
|
||||
);
|
||||
```
|
||||
|
||||
Create a dump from ClickHouse:
|
||||
|
||||
```
|
||||
SELECT WatchID::Int64, JavaEnable, replaceAll(replaceAll(replaceAll(toValidUTF8(Title), '\0', ''), '"', ''), '\\', ''), GoodEvent, EventTime, EventDate, CounterID::Int32, ClientIP::Int32, RegionID::Int32,
|
||||
UserID::Int64, CounterClass, OS, UserAgent, replaceAll(replaceAll(replaceAll(toValidUTF8(URL), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Referer), '\0', ''), '"', ''), '\\', ''), Refresh, RefererCategoryID::Int16, RefererRegionID::Int32,
|
||||
URLCategoryID::Int16, URLRegionID::Int32, ResolutionWidth::Int16, ResolutionHeight::Int16, ResolutionDepth, FlashMajor, FlashMinor,
|
||||
FlashMinor2, NetMajor, NetMinor, UserAgentMajor::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(UserAgentMinor::String), '\0', ''), '"', ''), '\\', ''), CookieEnable, JavascriptEnable, IsMobile, MobilePhone,
|
||||
replaceAll(replaceAll(replaceAll(toValidUTF8(MobilePhoneModel), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Params), '\0', ''), '"', ''), '\\', ''), IPNetworkID::Int32, TraficSourceID, SearchEngineID::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(SearchPhrase), '\0', ''), '"', ''), '\\', ''),
|
||||
AdvEngineID, IsArtifical, WindowClientWidth::Int16, WindowClientHeight::Int16, ClientTimeZone, ClientEventTime,
|
||||
SilverlightVersion1, SilverlightVersion2, SilverlightVersion3::Int32, SilverlightVersion4::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(PageCharset), '\0', ''), '"', ''), '\\', ''),
|
||||
CodeVersion::Int32, IsLink, IsDownload, IsNotBounce, FUniqID::Int64, replaceAll(replaceAll(replaceAll(toValidUTF8(OriginalURL), '\0', ''), '"', ''), '\\', ''), HID::Int32, IsOldCounter, IsEvent,
|
||||
IsParameter, DontCountHits, WithHash, replaceAll(replaceAll(replaceAll(toValidUTF8(HitColor::String), '\0', ''), '"', ''), '\\', ''), LocalEventTime, Age, Sex, Income, Interests::Int16, Robotness, RemoteIP::Int32,
|
||||
WindowName, OpenerName, HistoryLength, replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserLanguage::String), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserCountry::String), '\0', ''), '"', ''), '\\', ''),
|
||||
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialNetwork), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(SocialAction), '\0', ''), '"', ''), '\\', ''),
|
||||
HTTPError, least(SendTiming, 30000), least(DNSTiming, 30000), least(ConnectTiming, 30000), least(ResponseStartTiming, 30000),
|
||||
least(ResponseEndTiming, 30000), least(FetchTiming, 30000), SocialSourceNetworkID,
|
||||
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialSourcePage), '\0', ''), '"', ''), '\\', ''), ParamPrice, replaceAll(replaceAll(replaceAll(toValidUTF8(ParamOrderID), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(ParamCurrency::String), '\0', ''), '"', ''), '\\', ''),
|
||||
ParamCurrencyID::Int16, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID,
|
||||
UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash::Int64, URLHash::Int64, CLID::Int32
|
||||
FROM hits_100m_obfuscated
|
||||
INTO OUTFILE 'dump.tsv'
|
||||
FORMAT TSV
|
||||
```
|
||||
|
||||
Insert data into PostgreSQL:
|
||||
|
||||
```
|
||||
\copy hits_100m_pg FROM 'dump.tsv';
|
||||
```
|
@ -1,129 +0,0 @@
|
||||
Time: 122020.258 ms (02:02.020)
|
||||
Time: 5060.281 ms (00:05.060)
|
||||
Time: 5052.692 ms (00:05.053)
|
||||
Time: 129594.172 ms (02:09.594)
|
||||
Time: 8079.623 ms (00:08.080)
|
||||
Time: 7866.964 ms (00:07.867)
|
||||
Time: 129584.717 ms (02:09.585)
|
||||
Time: 8276.161 ms (00:08.276)
|
||||
Time: 8153.295 ms (00:08.153)
|
||||
Time: 123707.890 ms (02:03.708)
|
||||
Time: 6835.297 ms (00:06.835)
|
||||
Time: 6607.039 ms (00:06.607)
|
||||
Time: 166640.676 ms (02:46.641)
|
||||
Time: 75401.239 ms (01:15.401)
|
||||
Time: 73526.027 ms (01:13.526)
|
||||
Time: 272715.750 ms (04:32.716)
|
||||
Time: 182721.613 ms (03:02.722)
|
||||
Time: 182880.525 ms (03:02.881)
|
||||
Time: 127108.191 ms (02:07.108)
|
||||
Time: 6542.913 ms (00:06.543)
|
||||
Time: 6339.887 ms (00:06.340)
|
||||
Time: 127339.314 ms (02:07.339)
|
||||
Time: 8376.381 ms (00:08.376)
|
||||
Time: 7831.872 ms (00:07.832)
|
||||
Time: 179176.439 ms (02:59.176)
|
||||
Time: 58559.297 ms (00:58.559)
|
||||
Time: 58139.265 ms (00:58.139)
|
||||
Time: 182019.101 ms (03:02.019)
|
||||
Time: 58435.027 ms (00:58.435)
|
||||
Time: 58130.994 ms (00:58.131)
|
||||
Time: 132449.502 ms (02:12.450)
|
||||
Time: 11203.104 ms (00:11.203)
|
||||
Time: 11048.435 ms (00:11.048)
|
||||
Time: 128445.641 ms (02:08.446)
|
||||
Time: 11602.145 ms (00:11.602)
|
||||
Time: 11418.356 ms (00:11.418)
|
||||
Time: 162831.387 ms (02:42.831)
|
||||
Time: 41510.710 ms (00:41.511)
|
||||
Time: 41682.899 ms (00:41.683)
|
||||
Time: 171898.965 ms (02:51.899)
|
||||
Time: 47379.274 ms (00:47.379)
|
||||
Time: 47429.908 ms (00:47.430)
|
||||
Time: 161607.811 ms (02:41.608)
|
||||
Time: 41674.409 ms (00:41.674)
|
||||
Time: 40854.340 ms (00:40.854)
|
||||
Time: 175247.929 ms (02:55.248)
|
||||
Time: 46721.776 ms (00:46.722)
|
||||
Time: 46507.631 ms (00:46.508)
|
||||
Time: 335961.271 ms (05:35.961)
|
||||
Time: 248535.866 ms (04:08.536)
|
||||
Time: 247383.678 ms (04:07.384)
|
||||
Time: 132852.983 ms (02:12.853)
|
||||
Time: 14939.304 ms (00:14.939)
|
||||
Time: 14607.525 ms (00:14.608)
|
||||
Time: 243461.844 ms (04:03.462)
|
||||
Time: 157307.904 ms (02:37.308)
|
||||
Time: 155093.101 ms (02:35.093)
|
||||
Time: 122090.761 ms (02:02.091)
|
||||
Time: 6411.266 ms (00:06.411)
|
||||
Time: 6308.178 ms (00:06.308)
|
||||
Time: 126584.819 ms (02:06.585)
|
||||
Time: 8836.471 ms (00:08.836)
|
||||
Time: 8532.176 ms (00:08.532)
|
||||
Time: 125225.097 ms (02:05.225)
|
||||
Time: 10236.910 ms (00:10.237)
|
||||
Time: 9849.757 ms (00:09.850)
|
||||
Time: 139140.064 ms (02:19.140)
|
||||
Time: 21797.859 ms (00:21.798)
|
||||
Time: 21559.214 ms (00:21.559)
|
||||
Time: 124757.485 ms (02:04.757)
|
||||
Time: 8728.403 ms (00:08.728)
|
||||
Time: 8714.130 ms (00:08.714)
|
||||
Time: 120687.258 ms (02:00.687)
|
||||
Time: 8366.245 ms (00:08.366)
|
||||
Time: 8146.856 ms (00:08.147)
|
||||
Time: 122327.148 ms (02:02.327)
|
||||
Time: 8698.359 ms (00:08.698)
|
||||
Time: 8480.807 ms (00:08.481)
|
||||
Time: 123958.614 ms (02:03.959)
|
||||
Time: 8595.931 ms (00:08.596)
|
||||
Time: 8241.773 ms (00:08.242)
|
||||
Time: 128982.905 ms (02:08.983)
|
||||
Time: 11252.783 ms (00:11.253)
|
||||
Time: 10957.931 ms (00:10.958)
|
||||
Time: 208455.385 ms (03:28.455)
|
||||
Time: 102530.897 ms (01:42.531)
|
||||
Time: 102049.298 ms (01:42.049)
|
||||
Time: 131268.420 ms (02:11.268)
|
||||
Time: 21094.466 ms (00:21.094)
|
||||
Time: 20934.610 ms (00:20.935)
|
||||
Time: 164084.134 ms (02:44.084)
|
||||
Time: 77418.547 ms (01:17.419)
|
||||
Time: 75422.290 ms (01:15.422)
|
||||
Time: 174800.022 ms (02:54.800)
|
||||
Time: 87859.594 ms (01:27.860)
|
||||
Time: 85733.954 ms (01:25.734)
|
||||
Time: 419357.463 ms (06:59.357)
|
||||
Time: 339047.269 ms (05:39.047)
|
||||
Time: 334808.230 ms (05:34.808)
|
||||
Time: 475011.901 ms (07:55.012)
|
||||
Time: 344406.246 ms (05:44.406)
|
||||
Time: 347197.731 ms (05:47.198)
|
||||
Time: 464657.732 ms (07:44.658)
|
||||
Time: 332084.079 ms (05:32.084)
|
||||
Time: 330921.322 ms (05:30.921)
|
||||
Time: 152490.615 ms (02:32.491)
|
||||
Time: 30954.343 ms (00:30.954)
|
||||
Time: 31379.062 ms (00:31.379)
|
||||
Time: 128539.127 ms (02:08.539)
|
||||
Time: 12802.672 ms (00:12.803)
|
||||
Time: 12494.088 ms (00:12.494)
|
||||
Time: 125850.120 ms (02:05.850)
|
||||
Time: 10318.773 ms (00:10.319)
|
||||
Time: 9953.030 ms (00:09.953)
|
||||
Time: 126602.092 ms (02:06.602)
|
||||
Time: 8935.571 ms (00:08.936)
|
||||
Time: 8711.184 ms (00:08.711)
|
||||
Time: 133222.456 ms (02:13.222)
|
||||
Time: 11848.869 ms (00:11.849)
|
||||
Time: 11752.640 ms (00:11.753)
|
||||
Time: 126950.067 ms (02:06.950)
|
||||
Time: 11260.892 ms (00:11.261)
|
||||
Time: 10943.649 ms (00:10.944)
|
||||
Time: 128451.171 ms (02:08.451)
|
||||
Time: 10984.980 ms (00:10.985)
|
||||
Time: 10770.609 ms (00:10.771)
|
||||
Time: 124621.000 ms (02:04.621)
|
||||
Time: 8885.466 ms (00:08.885)
|
||||
Time: 8857.296 ms (00:08.857)
|
@ -1,43 +0,0 @@
|
||||
SELECT count(*) FROM {table};
|
||||
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
|
||||
SELECT sum(UserID) FROM {table};
|
||||
SELECT COUNT(DISTINCT UserID) FROM {table};
|
||||
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
|
||||
SELECT min(EventDate), max(EventDate) FROM {table};
|
||||
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
|
||||
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
|
||||
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);
|
@ -1,11 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_obfuscated/' | while read query; do
|
||||
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches
|
||||
|
||||
echo "$query";
|
||||
for i in {1..3}; do
|
||||
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
|
||||
done;
|
||||
done;
|
@ -1,215 +0,0 @@
|
||||
3
|
||||
SELECT count(*) FROM hits_100m_obfuscated;
|
||||
Time: 3259.733 ms (00:03.260)
|
||||
Time: 3135.484 ms (00:03.135)
|
||||
Time: 3135.579 ms (00:03.136)
|
||||
3
|
||||
SELECT count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0;
|
||||
Time: 146854.557 ms (02:26.855)
|
||||
Time: 6921.736 ms (00:06.922)
|
||||
Time: 6619.892 ms (00:06.620)
|
||||
3
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_100m_obfuscated;
|
||||
Time: 146568.297 ms (02:26.568)
|
||||
Time: 7481.610 ms (00:07.482)
|
||||
Time: 7258.209 ms (00:07.258)
|
||||
3
|
||||
SELECT sum(UserID) FROM hits_100m_obfuscated;
|
||||
Time: 146864.106 ms (02:26.864)
|
||||
Time: 5690.024 ms (00:05.690)
|
||||
Time: 5381.820 ms (00:05.382)
|
||||
3
|
||||
SELECT COUNT(DISTINCT UserID) FROM hits_100m_obfuscated;
|
||||
Time: 227507.331 ms (03:47.507)
|
||||
Time: 69165.471 ms (01:09.165)
|
||||
Time: 72216.950 ms (01:12.217)
|
||||
3
|
||||
SELECT COUNT(DISTINCT SearchPhrase) FROM hits_100m_obfuscated;
|
||||
Time: 323644.397 ms (05:23.644)
|
||||
Time: 177578.740 ms (02:57.579)
|
||||
Time: 175055.738 ms (02:55.056)
|
||||
3
|
||||
SELECT min(EventDate), max(EventDate) FROM hits_100m_obfuscated;
|
||||
Time: 146147.843 ms (02:26.148)
|
||||
Time: 5735.128 ms (00:05.735)
|
||||
Time: 5428.638 ms (00:05.429)
|
||||
3
|
||||
SELECT AdvEngineID, count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
Time: 148658.450 ms (02:28.658)
|
||||
Time: 7014.882 ms (00:07.015)
|
||||
Time: 6599.736 ms (00:06.600)
|
||||
3
|
||||
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
Time: 202423.122 ms (03:22.423)
|
||||
Time: 54439.047 ms (00:54.439)
|
||||
Time: 54800.354 ms (00:54.800)
|
||||
3
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
Time: 201152.491 ms (03:21.152)
|
||||
Time: 55875.854 ms (00:55.876)
|
||||
Time: 55200.330 ms (00:55.200)
|
||||
3
|
||||
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
Time: 146042.603 ms (02:26.043)
|
||||
Time: 9931.633 ms (00:09.932)
|
||||
Time: 10037.032 ms (00:10.037)
|
||||
3
|
||||
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
Time: 150811.952 ms (02:30.812)
|
||||
Time: 10320.230 ms (00:10.320)
|
||||
Time: 9993.232 ms (00:09.993)
|
||||
3
|
||||
SELECT SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
Time: 173071.218 ms (02:53.071)
|
||||
Time: 34314.835 ms (00:34.315)
|
||||
Time: 34420.919 ms (00:34.421)
|
||||
3
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
Time: 172874.155 ms (02:52.874)
|
||||
Time: 43704.494 ms (00:43.704)
|
||||
Time: 43918.380 ms (00:43.918)
|
||||
3
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
Time: 178484.822 ms (02:58.485)
|
||||
Time: 36850.436 ms (00:36.850)
|
||||
Time: 35789.029 ms (00:35.789)
|
||||
3
|
||||
SELECT UserID, count(*) FROM hits_100m_obfuscated GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
Time: 169720.759 ms (02:49.721)
|
||||
Time: 24125.730 ms (00:24.126)
|
||||
Time: 23782.745 ms (00:23.783)
|
||||
3
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
Time: 182335.631 ms (03:02.336)
|
||||
Time: 37324.563 ms (00:37.325)
|
||||
Time: 37124.250 ms (00:37.124)
|
||||
3
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
Time: 163799.714 ms (02:43.800)
|
||||
Time: 18514.031 ms (00:18.514)
|
||||
Time: 18968.524 ms (00:18.969)
|
||||
3
|
||||
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
Time: 294799.480 ms (04:54.799)
|
||||
Time: 149592.992 ms (02:29.593)
|
||||
Time: 149466.291 ms (02:29.466)
|
||||
3
|
||||
SELECT UserID FROM hits_100m_obfuscated WHERE UserID = -6101065172474983726;
|
||||
Time: 140797.496 ms (02:20.797)
|
||||
Time: 5312.321 ms (00:05.312)
|
||||
Time: 5020.502 ms (00:05.021)
|
||||
3
|
||||
SELECT count(*) FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%';
|
||||
Time: 143092.287 ms (02:23.092)
|
||||
Time: 7893.874 ms (00:07.894)
|
||||
Time: 7661.326 ms (00:07.661)
|
||||
3
|
||||
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
Time: 143682.424 ms (02:23.682)
|
||||
Time: 9249.962 ms (00:09.250)
|
||||
Time: 9073.876 ms (00:09.074)
|
||||
3
|
||||
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits_100m_obfuscated WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
Time: 150965.884 ms (02:30.966)
|
||||
Time: 20350.812 ms (00:20.351)
|
||||
Time: 20074.939 ms (00:20.075)
|
||||
3
|
||||
SELECT * FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
Time: 4674.669 ms (00:04.675)
|
||||
Time: 4532.389 ms (00:04.532)
|
||||
Time: 4555.457 ms (00:04.555)
|
||||
3
|
||||
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
Time: 5.177 ms
|
||||
Time: 5.031 ms
|
||||
Time: 4.419 ms
|
||||
3
|
||||
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
Time: 141152.210 ms (02:21.152)
|
||||
Time: 7492.968 ms (00:07.493)
|
||||
Time: 7300.428 ms (00:07.300)
|
||||
3
|
||||
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
Time: 30.736 ms
|
||||
Time: 5.018 ms
|
||||
Time: 5.132 ms
|
||||
3
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_100m_obfuscated WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
Time: 144034.016 ms (02:24.034)
|
||||
Time: 10701.672 ms (00:10.702)
|
||||
Time: 10348.565 ms (00:10.349)
|
||||
3
|
||||
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www.)?([^/]+)/.*$', '1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits_100m_obfuscated WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
Time: 191575.080 ms (03:11.575)
|
||||
Time: 97836.706 ms (01:37.837)
|
||||
Time: 97673.219 ms (01:37.673)
|
||||
3
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_100m_obfuscated;
|
||||
Time: 143652.317 ms (02:23.652)
|
||||
Time: 22185.656 ms (00:22.186)
|
||||
Time: 21887.411 ms (00:21.887)
|
||||
3
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
Time: 153481.944 ms (02:33.482)
|
||||
Time: 17748.628 ms (00:17.749)
|
||||
Time: 17551.116 ms (00:17.551)
|
||||
3
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
Time: 167448.684 ms (02:47.449)
|
||||
Time: 25902.961 ms (00:25.903)
|
||||
Time: 25592.018 ms (00:25.592)
|
||||
3
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
Time: 299183.443 ms (04:59.183)
|
||||
Time: 145349.772 ms (02:25.350)
|
||||
Time: 143214.688 ms (02:23.215)
|
||||
3
|
||||
SELECT URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
Time: 389851.369 ms (06:29.851)
|
||||
Time: 228158.639 ms (03:48.159)
|
||||
Time: 231811.118 ms (03:51.811)
|
||||
3
|
||||
SELECT 1, URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
Time: 407458.343 ms (06:47.458)
|
||||
Time: 230125.530 ms (03:50.126)
|
||||
Time: 230764.511 ms (03:50.765)
|
||||
3
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_100m_obfuscated GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
Time: 174098.556 ms (02:54.099)
|
||||
Time: 23503.975 ms (00:23.504)
|
||||
Time: 24322.856 ms (00:24.323)
|
||||
3
|
||||
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
Time: 145906.025 ms (02:25.906)
|
||||
Time: 10824.695 ms (00:10.825)
|
||||
Time: 10484.885 ms (00:10.485)
|
||||
3
|
||||
SELECT Title, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
Time: 144063.711 ms (02:24.064)
|
||||
Time: 8947.980 ms (00:08.948)
|
||||
Time: 8608.434 ms (00:08.608)
|
||||
3
|
||||
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
Time: 141883.596 ms (02:21.884)
|
||||
Time: 7977.257 ms (00:07.977)
|
||||
Time: 7673.547 ms (00:07.674)
|
||||
3
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
Time: 147100.084 ms (02:27.100)
|
||||
Time: 9527.812 ms (00:09.528)
|
||||
Time: 9457.663 ms (00:09.458)
|
||||
3
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
Time: 144585.669 ms (02:24.586)
|
||||
Time: 10815.223 ms (00:10.815)
|
||||
Time: 10594.707 ms (00:10.595)
|
||||
3
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
Time: 145738.341 ms (02:25.738)
|
||||
Time: 10592.979 ms (00:10.593)
|
||||
Time: 10181.477 ms (00:10.181)
|
||||
3
|
||||
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);
|
||||
Time: 145023.796 ms (02:25.024)
|
||||
Time: 8035.337 ms (00:08.035)
|
||||
Time: 7865.698 ms (00:07.866)
|
@ -1,129 +0,0 @@
|
||||
Time: 1784.299 ms (00:01.784)
|
||||
Time: 1223.461 ms (00:01.223)
|
||||
Time: 1200.665 ms (00:01.201)
|
||||
Time: 22730.141 ms (00:22.730)
|
||||
Time: 1379.227 ms (00:01.379)
|
||||
Time: 1361.595 ms (00:01.362)
|
||||
Time: 29888.235 ms (00:29.888)
|
||||
Time: 3160.611 ms (00:03.161)
|
||||
Time: 3207.363 ms (00:03.207)
|
||||
Time: 53922.569 ms (00:53.923)
|
||||
Time: 2301.456 ms (00:02.301)
|
||||
Time: 2277.009 ms (00:02.277)
|
||||
Time: 45363.999 ms (00:45.364)
|
||||
Time: 43765.848 ms (00:43.766)
|
||||
Time: 44066.621 ms (00:44.067)
|
||||
Time: 172945.633 ms (02:52.946)
|
||||
Time: 136944.098 ms (02:16.944)
|
||||
Time: 138268.413 ms (02:18.268)
|
||||
Time: 16764.579 ms (00:16.765)
|
||||
Time: 2579.907 ms (00:02.580)
|
||||
Time: 2590.390 ms (00:02.590)
|
||||
Time: 1498.034 ms (00:01.498)
|
||||
Time: 1434.534 ms (00:01.435)
|
||||
Time: 1448.123 ms (00:01.448)
|
||||
Time: 113533.016 ms (01:53.533)
|
||||
Time: 78465.335 ms (01:18.465)
|
||||
Time: 80778.839 ms (01:20.779)
|
||||
Time: 90456.388 ms (01:30.456)
|
||||
Time: 87050.166 ms (01:27.050)
|
||||
Time: 88426.851 ms (01:28.427)
|
||||
Time: 45021.632 ms (00:45.022)
|
||||
Time: 12486.342 ms (00:12.486)
|
||||
Time: 12222.489 ms (00:12.222)
|
||||
Time: 44246.843 ms (00:44.247)
|
||||
Time: 15606.856 ms (00:15.607)
|
||||
Time: 15251.554 ms (00:15.252)
|
||||
Time: 29654.719 ms (00:29.655)
|
||||
Time: 29441.858 ms (00:29.442)
|
||||
Time: 29608.141 ms (00:29.608)
|
||||
Time: 103547.383 ms (01:43.547)
|
||||
Time: 104733.648 ms (01:44.734)
|
||||
Time: 105779.016 ms (01:45.779)
|
||||
Time: 29695.834 ms (00:29.696)
|
||||
Time: 15395.447 ms (00:15.395)
|
||||
Time: 15819.650 ms (00:15.820)
|
||||
Time: 27841.552 ms (00:27.842)
|
||||
Time: 29521.849 ms (00:29.522)
|
||||
Time: 27508.521 ms (00:27.509)
|
||||
Time: 56665.709 ms (00:56.666)
|
||||
Time: 56459.321 ms (00:56.459)
|
||||
Time: 56407.620 ms (00:56.408)
|
||||
Time: 27488.888 ms (00:27.489)
|
||||
Time: 25557.427 ms (00:25.557)
|
||||
Time: 25634.140 ms (00:25.634)
|
||||
Time: 97376.463 ms (01:37.376)
|
||||
Time: 96047.902 ms (01:36.048)
|
||||
Time: 99918.341 ms (01:39.918)
|
||||
Time: 6294.887 ms (00:06.295)
|
||||
Time: 6407.262 ms (00:06.407)
|
||||
Time: 6376.369 ms (00:06.376)
|
||||
Time: 40787.808 ms (00:40.788)
|
||||
Time: 11206.256 ms (00:11.206)
|
||||
Time: 11219.871 ms (00:11.220)
|
||||
Time: 12420.227 ms (00:12.420)
|
||||
Time: 12548.301 ms (00:12.548)
|
||||
Time: 12468.458 ms (00:12.468)
|
||||
Time: 57679.878 ms (00:57.680)
|
||||
Time: 35466.123 ms (00:35.466)
|
||||
Time: 35562.064 ms (00:35.562)
|
||||
Time: 13551.276 ms (00:13.551)
|
||||
Time: 13417.313 ms (00:13.417)
|
||||
Time: 13645.287 ms (00:13.645)
|
||||
Time: 150.297 ms
|
||||
Time: 55.995 ms
|
||||
Time: 55.796 ms
|
||||
Time: 3059.796 ms (00:03.060)
|
||||
Time: 3038.246 ms (00:03.038)
|
||||
Time: 3041.210 ms (00:03.041)
|
||||
Time: 4461.720 ms (00:04.462)
|
||||
Time: 4446.691 ms (00:04.447)
|
||||
Time: 4424.526 ms (00:04.425)
|
||||
Time: 29275.463 ms (00:29.275)
|
||||
Time: 17558.747 ms (00:17.559)
|
||||
Time: 17438.621 ms (00:17.439)
|
||||
Time: 203316.184 ms (03:23.316)
|
||||
Time: 190037.946 ms (03:10.038)
|
||||
Time: 189276.624 ms (03:09.277)
|
||||
Time: 36921.542 ms (00:36.922)
|
||||
Time: 36963.771 ms (00:36.964)
|
||||
Time: 36660.406 ms (00:36.660)
|
||||
Time: 38307.345 ms (00:38.307)
|
||||
Time: 17597.355 ms (00:17.597)
|
||||
Time: 17324.776 ms (00:17.325)
|
||||
Time: 39857.567 ms (00:39.858)
|
||||
Time: 26776.411 ms (00:26.776)
|
||||
Time: 26592.819 ms (00:26.593)
|
||||
Time: 162782.290 ms (02:42.782)
|
||||
Time: 160722.582 ms (02:40.723)
|
||||
Time: 162487.263 ms (02:42.487)
|
||||
Time: 261494.290 ms (04:21.494)
|
||||
Time: 263594.014 ms (04:23.594)
|
||||
Time: 260436.201 ms (04:20.436)
|
||||
Time: 265758.455 ms (04:25.758)
|
||||
Time: 270087.523 ms (04:30.088)
|
||||
Time: 266617.218 ms (04:26.617)
|
||||
Time: 30677.159 ms (00:30.677)
|
||||
Time: 28933.542 ms (00:28.934)
|
||||
Time: 29815.271 ms (00:29.815)
|
||||
Time: 19754.932 ms (00:19.755)
|
||||
Time: 16851.157 ms (00:16.851)
|
||||
Time: 16703.289 ms (00:16.703)
|
||||
Time: 10379.500 ms (00:10.379)
|
||||
Time: 10267.336 ms (00:10.267)
|
||||
Time: 10287.944 ms (00:10.288)
|
||||
Time: 17320.582 ms (00:17.321)
|
||||
Time: 9786.410 ms (00:09.786)
|
||||
Time: 9760.578 ms (00:09.761)
|
||||
Time: 33487.352 ms (00:33.487)
|
||||
Time: 26056.528 ms (00:26.057)
|
||||
Time: 25958.258 ms (00:25.958)
|
||||
Time: 28020.227 ms (00:28.020)
|
||||
Time: 5609.725 ms (00:05.610)
|
||||
Time: 5538.744 ms (00:05.539)
|
||||
Time: 15119.473 ms (00:15.119)
|
||||
Time: 5057.455 ms (00:05.057)
|
||||
Time: 5063.154 ms (00:05.063)
|
||||
Time: 3627.703 ms (00:03.628)
|
||||
Time: 3645.232 ms (00:03.645)
|
||||
Time: 3546.855 ms (00:03.547)
|
@ -1,43 +0,0 @@
|
||||
SELECT count(*) FROM {table};
|
||||
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
|
||||
SELECT sum(UserID) FROM {table};
|
||||
SELECT COUNT(DISTINCT UserID) FROM {table};
|
||||
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
|
||||
SELECT min(EventDate), max(EventDate) FROM {table};
|
||||
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
|
||||
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
|
||||
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user