Merge branch 'master' into avoid_printing_settings

This commit is contained in:
mergify[bot] 2022-07-23 19:10:07 +00:00 committed by GitHub
commit 607c7f0ed8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
991 changed files with 34384 additions and 115171 deletions

View File

@ -1,6 +1,6 @@
---
name: Bug report
about: Wrong behaviour (visible to users) in official ClickHouse release.
about: Wrong behavior (visible to users) in the official ClickHouse release.
title: ''
labels: 'potential bug'
assignees: ''

View File

@ -8,7 +8,7 @@ concurrency:
group: cherry-pick
on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */3 * * *'
- cron: '0 * * * *'
workflow_dispatch:
jobs:

View File

@ -102,6 +102,9 @@ jobs:
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{ runner.temp }}/style_check
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job

View File

@ -108,7 +108,7 @@ jobs:
- name: Style Check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 style_check.py
python3 style_check.py --no-push
- name: Cleanup
if: always()
run: |
@ -2902,7 +2902,7 @@ jobs:
#############################################################################################
#################################### PERFORMANCE TESTS ######################################
#############################################################################################
PerformanceComparison0:
PerformanceComparisonX86-0:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
@ -2940,7 +2940,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparison1:
PerformanceComparisonX86-1:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
@ -2978,7 +2978,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparison2:
PerformanceComparisonX86-2:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
@ -3016,7 +3016,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparison3:
PerformanceComparisonX86-3:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
@ -3096,10 +3096,10 @@ jobs:
- IntegrationTestsTsan1
- IntegrationTestsTsan2
- IntegrationTestsTsan3
- PerformanceComparison0
- PerformanceComparison1
- PerformanceComparison2
- PerformanceComparison3
- PerformanceComparisonX86-0
- PerformanceComparisonX86-1
- PerformanceComparisonX86-2
- PerformanceComparisonX86-3
- CompatibilityCheck
- ASTFuzzerTestDebug
- ASTFuzzerTestAsan

View File

@ -118,6 +118,9 @@ jobs:
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{ runner.temp }}/style_check
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job
@ -3118,7 +3121,7 @@ jobs:
#############################################################################################
#################################### PERFORMANCE TESTS ######################################
#############################################################################################
PerformanceComparison0:
PerformanceComparisonX86-0:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
@ -3156,7 +3159,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparison1:
PerformanceComparisonX86-1:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
@ -3194,7 +3197,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparison2:
PerformanceComparisonX86-2:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
@ -3232,7 +3235,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparison3:
PerformanceComparisonX86-3:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
@ -3270,7 +3273,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparisonAarch0:
PerformanceComparisonAarch-0:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]
steps:
@ -3308,7 +3311,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparisonAarch1:
PerformanceComparisonAarch-1:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]
steps:
@ -3346,7 +3349,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparisonAarch2:
PerformanceComparisonAarch-2:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]
steps:
@ -3384,7 +3387,7 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
PerformanceComparisonAarch3:
PerformanceComparisonAarch-3:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]
steps:
@ -3481,14 +3484,14 @@ jobs:
- IntegrationTestsTsan1
- IntegrationTestsTsan2
- IntegrationTestsTsan3
- PerformanceComparison0
- PerformanceComparison1
- PerformanceComparison2
- PerformanceComparison3
- PerformanceComparisonAarch0
- PerformanceComparisonAarch1
- PerformanceComparisonAarch2
- PerformanceComparisonAarch3
- PerformanceComparisonX86-0
- PerformanceComparisonX86-1
- PerformanceComparisonX86-2
- PerformanceComparisonX86-3
- PerformanceComparisonAarch-0
- PerformanceComparisonAarch-1
- PerformanceComparisonAarch-2
- PerformanceComparisonAarch-3
- UnitTestsAsan
- UnitTestsTsan
- UnitTestsMsan

6
.gitmodules vendored
View File

@ -259,6 +259,9 @@
[submodule "contrib/minizip-ng"]
path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl.git
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash.git
@ -277,3 +280,6 @@
[submodule "contrib/base-x"]
path = contrib/base-x
url = https://github.com/ClickHouse/base-x.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares

View File

@ -1,4 +1,5 @@
### Table of Contents
**[ClickHouse release v22.7, 2022-07-21](#226)**<br>
**[ClickHouse release v22.6, 2022-06-16](#226)**<br>
**[ClickHouse release v22.5, 2022-05-19](#225)**<br>
**[ClickHouse release v22.4, 2022-04-20](#224)**<br>
@ -7,6 +8,172 @@
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br>
### <a id="227"></a> ClickHouse release 22.7, 2022-07-21
#### Upgrade Notes
* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back.
#### New Feature
* Support expressions with window functions. Closes [#19857](https://github.com/ClickHouse/ClickHouse/issues/19857). [#37848](https://github.com/ClickHouse/ClickHouse/pull/37848) ([Dmitry Novik](https://github.com/novikd)).
* Add new `direct` join algorithm for `EmbeddedRocksDB` tables, see [#33582](https://github.com/ClickHouse/ClickHouse/issues/33582). [#35363](https://github.com/ClickHouse/ClickHouse/pull/35363) ([Vladimir C](https://github.com/vdimir)).
* Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)).
* Implement NATS table engine, which allows to pub/sub to NATS. Closes [#32388](https://github.com/ClickHouse/ClickHouse/issues/32388). [#37171](https://github.com/ClickHouse/ClickHouse/pull/37171) ([tchepavel](https://github.com/tchepavel)). ([Kseniia Sumarokova](https://github.com/kssenii))
* Implement table function `mongodb`. Allow writes into `MongoDB` storage / table function. [#37213](https://github.com/ClickHouse/ClickHouse/pull/37213) ([aaapetrenko](https://github.com/aaapetrenko)). ([Kseniia Sumarokova](https://github.com/kssenii))
* Add `SQLInsert` output format. Closes [#38441](https://github.com/ClickHouse/ClickHouse/issues/38441). [#38477](https://github.com/ClickHouse/ClickHouse/pull/38477) ([Kruglov Pavel](https://github.com/Avogar)).
* Introduced settings `additional_table_filters`. Using this setting, you can specify additional filtering condition for a table which will be applied directly after reading. Example: `select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers : 'number != 3', 'table_1' : 'x != 2'}`. Introduced setting `additional_result_filter` which specifies additional filtering condition for query result. Closes [#37918](https://github.com/ClickHouse/ClickHouse/issues/37918). [#38475](https://github.com/ClickHouse/ClickHouse/pull/38475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add `compatibility` setting and `system.settings_changes` system table that contains information about changes in settings through ClickHouse versions. Closes [#35972](https://github.com/ClickHouse/ClickHouse/issues/35972). [#38957](https://github.com/ClickHouse/ClickHouse/pull/38957) ([Kruglov Pavel](https://github.com/Avogar)).
* Add functions `translate(string, from_string, to_string)` and `translateUTF8(string, from_string, to_string)`. It translates some characters to another. [#38935](https://github.com/ClickHouse/ClickHouse/pull/38935) ([Nikolay Degterinsky](https://github.com/evillique)).
* Support `parseTimeDelta` function. It can be used like ` ;-+,:` can be used as separators, eg. `1yr-2mo`, `2m:6s`: `SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds')`. [#39071](https://github.com/ClickHouse/ClickHouse/pull/39071) ([jiahui-97](https://github.com/jiahui-97)).
* Added `CREATE TABLE ... EMPTY AS SELECT` query. It automatically deduces table structure from the SELECT query, but does not fill the table after creation. Resolves [#38049](https://github.com/ClickHouse/ClickHouse/issues/38049). [#38272](https://github.com/ClickHouse/ClickHouse/pull/38272) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Added options to limit IO operations with remote storage: `max_remote_read_network_bandwidth_for_server` and `max_remote_write_network_bandwidth_for_server`. [#39095](https://github.com/ClickHouse/ClickHouse/pull/39095) ([Sergei Trifonov](https://github.com/serxa)).
* Add `group_by_use_nulls` setting to make aggregation key columns nullable in the case of ROLLUP, CUBE and GROUPING SETS. Closes [#37359](https://github.com/ClickHouse/ClickHouse/issues/37359). [#38642](https://github.com/ClickHouse/ClickHouse/pull/38642) ([Dmitry Novik](https://github.com/novikd)).
* Add the ability to specify compression level during data export. [#38907](https://github.com/ClickHouse/ClickHouse/pull/38907) ([Nikolay Degterinsky](https://github.com/evillique)).
* Add an option to require explicit grants to SELECT from the `system` database. Details: [#38970](https://github.com/ClickHouse/ClickHouse/pull/38970) ([Vitaly Baranov](https://github.com/vitlibar)).
* Functions `multiMatchAny`, `multiMatchAnyIndex`, `multiMatchAllIndices` and their fuzzy variants now accept non-const pattern array argument. [#38485](https://github.com/ClickHouse/ClickHouse/pull/38485) ([Robert Schulze](https://github.com/rschu1ze)). SQL function `multiSearchAllPositions` now accepts non-const needle arguments. [#39167](https://github.com/ClickHouse/ClickHouse/pull/39167) ([Robert Schulze](https://github.com/rschu1ze)).
* Add a setting `zstd_window_log_max` to configure max memory usage on zstd decoding when importing external files. Closes [#35693](https://github.com/ClickHouse/ClickHouse/issues/35693). [#37015](https://github.com/ClickHouse/ClickHouse/pull/37015) ([wuxiaobai24](https://github.com/wuxiaobai24)).
* Add `send_logs_source_regexp` setting. Send server text logs with specified regexp to match log source name. Empty means all sources. [#39161](https://github.com/ClickHouse/ClickHouse/pull/39161) ([Amos Bird](https://github.com/amosbird)).
* Support `ALTER` for `Hive` tables. [#38214](https://github.com/ClickHouse/ClickHouse/pull/38214) ([lgbo](https://github.com/lgbo-ustc)).
* Support `isNullable` function. This function checks whether it's argument is nullable and return 1 or 0. Closes [#38611](https://github.com/ClickHouse/ClickHouse/issues/38611). [#38841](https://github.com/ClickHouse/ClickHouse/pull/38841) ([lokax](https://github.com/lokax)).
* Added functions for base58 encoding/decoding. [#38159](https://github.com/ClickHouse/ClickHouse/pull/38159) ([Andrey Zvonov](https://github.com/zvonand)).
* Add chart visualization to Play UI. [#38197](https://github.com/ClickHouse/ClickHouse/pull/38197) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added L2 Squared distance and norm functions for both arrays and tuples. [#38545](https://github.com/ClickHouse/ClickHouse/pull/38545) ([Julian Gilyadov](https://github.com/israelg99)).
* Add ability to pass HTTP headers to the `url` table function / storage via SQL. Closes [#37897](https://github.com/ClickHouse/ClickHouse/issues/37897). [#38176](https://github.com/ClickHouse/ClickHouse/pull/38176) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Experimental Feature
* Adds new setting `implicit_transaction` to run standalone queries inside a transaction. It handles both creation and closing (via COMMIT if the query succeeded or ROLLBACK if it didn't) of the transaction automatically. [#38344](https://github.com/ClickHouse/ClickHouse/pull/38344) ([Raúl Marín](https://github.com/Algunenano)).
#### Performance Improvement
* Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)).
* Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)).
* More parallel execution for queries with `FINAL` [#36396](https://github.com/ClickHouse/ClickHouse/pull/36396) ([Nikita Taranov](https://github.com/nickitat)).
* Fix significant join performance regression which was introduced in [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616). It's interesting that common join queries such as ssb queries have been 10 times slower for almost 3 months while no one complains. [#38052](https://github.com/ClickHouse/ClickHouse/pull/38052) ([Amos Bird](https://github.com/amosbird)).
* Migrate from the Intel hyperscan library to vectorscan, this speeds up many string matching on non-x86 platforms. [#38171](https://github.com/ClickHouse/ClickHouse/pull/38171) ([Robert Schulze](https://github.com/rschu1ze)).
* Increased parallelism of query plan steps executed after aggregation. [#38295](https://github.com/ClickHouse/ClickHouse/pull/38295) ([Nikita Taranov](https://github.com/nickitat)).
* Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)).
* Optimized insertion and lookups in the HashTable. [#38413](https://github.com/ClickHouse/ClickHouse/pull/38413) ([Nikita Taranov](https://github.com/nickitat)).
* Fix performance degradation from [#32493](https://github.com/ClickHouse/ClickHouse/issues/32493). [#38417](https://github.com/ClickHouse/ClickHouse/pull/38417) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve performance of joining with numeric columns using SIMD instructions. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)).
* Norm and Distance functions for arrays speed up 1.2-2 times. [#38740](https://github.com/ClickHouse/ClickHouse/pull/38740) ([Alexander Gololobov](https://github.com/davenger)).
* Add AVX-512 VBMI optimized `copyOverlap32Shuffle` for LZ4 decompression. In other words, LZ4 decompression performance is improved. [#37891](https://github.com/ClickHouse/ClickHouse/pull/37891) ([Guo Wangyang](https://github.com/guowangy)).
* `ORDER BY (a, b)` will use all the same benefits as `ORDER BY a, b`. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)).
* Align branches within a 32B boundary to make benchmark more stable. [#38988](https://github.com/ClickHouse/ClickHouse/pull/38988) ([Guo Wangyang](https://github.com/guowangy)). It improves performance 1..2% on average for Intel.
* Executable UDF, executable dictionaries, and Executable tables will avoid wasting one second during wait for subprocess termination. [#38929](https://github.com/ClickHouse/ClickHouse/pull/38929) ([Constantine Peresypkin](https://github.com/pkit)).
* Optimize accesses to `system.stack_trace` table if not all columns are selected. [#39177](https://github.com/ClickHouse/ClickHouse/pull/39177) ([Azat Khuzhin](https://github.com/azat)).
* Improve isNullable/isConstant/isNull/isNotNull performance for LowCardinality argument. [#39192](https://github.com/ClickHouse/ClickHouse/pull/39192) ([Kruglov Pavel](https://github.com/Avogar)).
* Optimized processing of ORDER BY in window functions. [#34632](https://github.com/ClickHouse/ClickHouse/pull/34632) ([Vladimir Chebotarev](https://github.com/excitoon)).
* The table `system.asynchronous_metric_log` is further optimized for storage space. This closes [#38134](https://github.com/ClickHouse/ClickHouse/issues/38134). See the [YouTube video](https://www.youtube.com/watch?v=0fSp9SF8N8A). [#38428](https://github.com/ClickHouse/ClickHouse/pull/38428) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Improvement
* Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)).
* Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)).
* Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)).
* Remove subprocess run for kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)).
* * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)).
* Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)).
* Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)).
* Do not skip symlinks in `user_defined` directory during SQL user defined functions loading. Closes [#38042](https://github.com/ClickHouse/ClickHouse/issues/38042). [#38184](https://github.com/ClickHouse/ClickHouse/pull/38184) ([Maksim Kita](https://github.com/kitaisreal)).
* Added background cleanup of subdirectories in `store/`. In some cases clickhouse-server might left garbage subdirectories in `store/` (for example, on unsuccessful table creation) and those dirs were never been removed. Fixes [#33710](https://github.com/ClickHouse/ClickHouse/issues/33710). [#38265](https://github.com/ClickHouse/ClickHouse/pull/38265) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add `DESCRIBE CACHE` query to show cache settings from config. Add `SHOW CACHES` query to show available filesystem caches list. [#38279](https://github.com/ClickHouse/ClickHouse/pull/38279) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add access check for `system drop filesystem cache`. Support ON CLUSTER. [#38319](https://github.com/ClickHouse/ClickHouse/pull/38319) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix PostgreSQL database engine incompatibility on upgrade from 21.3 to 22.3. Closes [#36659](https://github.com/ClickHouse/ClickHouse/issues/36659). [#38369](https://github.com/ClickHouse/ClickHouse/pull/38369) ([Kseniia Sumarokova](https://github.com/kssenii)).
* `filesystemAvailable` and similar functions now work in `clickhouse-local`. This closes [#38423](https://github.com/ClickHouse/ClickHouse/issues/38423). [#38424](https://github.com/ClickHouse/ClickHouse/pull/38424) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add `revision` function. [#38555](https://github.com/ClickHouse/ClickHouse/pull/38555) ([Azat Khuzhin](https://github.com/azat)).
* Fix GCS via proxy tunnel usage. [#38726](https://github.com/ClickHouse/ClickHouse/pull/38726) ([Azat Khuzhin](https://github.com/azat)).
* Support `\i file` in clickhouse client / local (similar to psql \i). [#38813](https://github.com/ClickHouse/ClickHouse/pull/38813) ([Kseniia Sumarokova](https://github.com/kssenii)).
* New option `optimize = 1` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)).
* Allow trailing comma in columns list. closes [#38425](https://github.com/ClickHouse/ClickHouse/issues/38425). [#38440](https://github.com/ClickHouse/ClickHouse/pull/38440) ([chen](https://github.com/xiedeyantu)).
* Bugfixes and performance improvements for `parallel_hash` JOIN method. [#37648](https://github.com/ClickHouse/ClickHouse/pull/37648) ([Vladimir C](https://github.com/vdimir)).
* Support hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#37852](https://github.com/ClickHouse/ClickHouse/pull/37852) ([Peng Liu](https://github.com/michael1589)).
* Add struct type support in `StorageHive`. [#38118](https://github.com/ClickHouse/ClickHouse/pull/38118) ([lgbo](https://github.com/lgbo-ustc)).
* S3 single objects are now removed with `RemoveObjectRequest`. Implement compatibility with GCP which did not allow to use `removeFileIfExists` effectively breaking approximately half of `remove` functionality. Automatic detection for `DeleteObjects` S3 API, that is not supported by GCS. This will allow to use GCS without explicit `support_batch_delete=0` in configuration. [#37882](https://github.com/ClickHouse/ClickHouse/pull/37882) ([Vladimir Chebotarev](https://github.com/excitoon)).
* Expose basic ClickHouse Keeper related monitoring data (via ProfileEvents and CurrentMetrics). [#38072](https://github.com/ClickHouse/ClickHouse/pull/38072) ([lingpeng0314](https://github.com/lingpeng0314)).
* Support `auto_close` option for PostgreSQL engine connection. Closes [#31486](https://github.com/ClickHouse/ClickHouse/issues/31486). [#38363](https://github.com/ClickHouse/ClickHouse/pull/38363) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow `NULL` modifier in columns declaration for table functions. [#38816](https://github.com/ClickHouse/ClickHouse/pull/38816) ([Kruglov Pavel](https://github.com/Avogar)).
* Deactivate `mutations_finalizing_task` before shutdown to avoid benign `TABLE_IS_READ_ONLY` errors during shutdown. [#38851](https://github.com/ClickHouse/ClickHouse/pull/38851) ([Raúl Marín](https://github.com/Algunenano)).
* Eliminate unnecessary waiting of SELECT queries after ALTER queries in presence of INSERT queries if you use deprecated Ordinary databases. [#38864](https://github.com/ClickHouse/ClickHouse/pull/38864) ([Azat Khuzhin](https://github.com/azat)).
* New option `rewrite` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)).
* Stop reporting Zookeeper "Node exists" exceptions in system.errors when they are expected. [#38961](https://github.com/ClickHouse/ClickHouse/pull/38961) ([Raúl Marín](https://github.com/Algunenano)).
* `clickhouse-keeper`: add support for real-time digest calculation and verification. It is disabled by default. [#37555](https://github.com/ClickHouse/ClickHouse/pull/37555) ([Antonio Andelic](https://github.com/antonio2368)).
* Allow to specify globs `* or {expr1, expr2, expr3}` inside a key for `clickhouse-extract-from-config` tool. [#38966](https://github.com/ClickHouse/ClickHouse/pull/38966) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* clearOldLogs: Don't report KEEPER_EXCEPTION on concurrent deletes. [#39016](https://github.com/ClickHouse/ClickHouse/pull/39016) ([Raúl Marín](https://github.com/Algunenano)).
* clickhouse-keeper improvement: persist meta-information about keeper servers to disk. [#39069](https://github.com/ClickHouse/ClickHouse/pull/39069) ([Antonio Andelic](https://github.com/antonio2368)). This will make it easier to operate if you shutdown or restart all keeper nodes at the same time.
* Continue without exception when running out of disk space when using filesystem cache. [#39106](https://github.com/ClickHouse/ClickHouse/pull/39106) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Handling SIGTERM signals from k8s. [#39130](https://github.com/ClickHouse/ClickHouse/pull/39130) ([Timur Solodovnikov](https://github.com/tsolodov)).
* Add `merge_algorithm` column (Undecided, Horizontal, Vertical) to system.part_log. [#39181](https://github.com/ClickHouse/ClickHouse/pull/39181) ([Azat Khuzhin](https://github.com/azat)).
* Don't increment a counter in `system.errors` when the disk is not rotational. [#39216](https://github.com/ClickHouse/ClickHouse/pull/39216) ([Raúl Marín](https://github.com/Algunenano)).
* The metric `result_bytes` for `INSERT` queries in `system.query_log` shows number of bytes inserted. Previously value was incorrect and stored the same value as `result_rows`. [#39225](https://github.com/ClickHouse/ClickHouse/pull/39225) ([Ilya Yatsishin](https://github.com/qoega)).
* The CPU usage metric in clickhouse-client will be displayed in a better way. Fixes [#38756](https://github.com/ClickHouse/ClickHouse/issues/38756). [#39280](https://github.com/ClickHouse/ClickHouse/pull/39280) ([Sergei Trifonov](https://github.com/serxa)).
* Rethrow exception on filesystem cache initialization on server startup, better error message. [#39386](https://github.com/ClickHouse/ClickHouse/pull/39386) ([Kseniia Sumarokova](https://github.com/kssenii)).
* OpenTelemetry now collects traces without Processors spans by default (there are too many). To enable Processors spans collection `opentelemetry_trace_processors` setting. [#39170](https://github.com/ClickHouse/ClickHouse/pull/39170) ([Ilya Yatsishin](https://github.com/qoega)).
* Functions `multiMatch[Fuzzy](AllIndices/Any/AnyIndex)` - don't throw a logical error if the needle argument is empty. [#39012](https://github.com/ClickHouse/ClickHouse/pull/39012) ([Robert Schulze](https://github.com/rschu1ze)).
* Allow to declare `RabbitMQ` queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)).
#### Build/Testing/Packaging Improvement
* Apply Clang Thread Safety Analysis (TSA) annotations to ClickHouse. [#38068](https://github.com/ClickHouse/ClickHouse/pull/38068) ([Robert Schulze](https://github.com/rschu1ze)).
* Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Preparation for building on `s390x` platform. [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fix a bug in `jemalloc` library [#38757](https://github.com/ClickHouse/ClickHouse/pull/38757) ([Azat Khuzhin](https://github.com/azat)).
* Hardware benchmark now has support for automatic results uploading. [#38427](https://github.com/ClickHouse/ClickHouse/pull/38427) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* System table "system.licenses" is now correctly populated on Mac (Darwin). [#38294](https://github.com/ClickHouse/ClickHouse/pull/38294) ([Robert Schulze](https://github.com/rschu1ze)).
* Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Fix rounding for `Decimal128/Decimal256` with more than 19-digits long scale. [#38027](https://github.com/ClickHouse/ClickHouse/pull/38027) ([Igor Nikonov](https://github.com/devcrafter)).
* Fixed crash caused by data race in storage `Hive` (integration table engine). [#38887](https://github.com/ClickHouse/ClickHouse/pull/38887) ([lgbo](https://github.com/lgbo-ustc)).
* Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)).
* Correct glob expansion in case of `{0..10}` forms. Fixes [#38498](https://github.com/ClickHouse/ClickHouse/issues/38498) Current Implementation is similar to what shell does mentiond by @rschu1ze [here](https://github.com/ClickHouse/ClickHouse/pull/38502#issuecomment-1169057723). [#38502](https://github.com/ClickHouse/ClickHouse/pull/38502) ([Heena Bansal](https://github.com/HeenaBansal2009)).
* Fix crash for `mapUpdate`, `mapFilter` functions when using with constant map argument. Closes [#38547](https://github.com/ClickHouse/ClickHouse/issues/38547). [#38553](https://github.com/ClickHouse/ClickHouse/pull/38553) ([hexiaoting](https://github.com/hexiaoting)).
* Fix `toHour` monotonicity information for query optimization which can lead to incorrect query result (incorrect index analysis). This fixes [#38333](https://github.com/ClickHouse/ClickHouse/issues/38333). [#38675](https://github.com/ClickHouse/ClickHouse/pull/38675) ([Amos Bird](https://github.com/amosbird)).
* Fix checking whether s3 storage support parallel writes. It resulted in s3 parallel writes not working. [#38792](https://github.com/ClickHouse/ClickHouse/pull/38792) ([chen](https://github.com/xiedeyantu)).
* Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621) - a buffer overflow on machines with the latest Intel CPUs with AVX-512 VBMI. [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix possible logical error for Vertical merges. [#38859](https://github.com/ClickHouse/ClickHouse/pull/38859) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)).
* Fix incorrect partition pruning when there is a nullable partition key. Note: most likely you don't use nullable partition keys - this is an obscure feature you should not use. Nullable keys are a nonsense and this feature is only needed for some crazy use-cases. This fixes [#38941](https://github.com/ClickHouse/ClickHouse/issues/38941). [#38946](https://github.com/ClickHouse/ClickHouse/pull/38946) ([Amos Bird](https://github.com/amosbird)).
* Improve `fsync_part_directory` for fetches. [#38993](https://github.com/ClickHouse/ClickHouse/pull/38993) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible dealock inside `OvercommitTracker`. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)).
* Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix some corner cases of interpretation of the arguments of window expressions. Fixes [#38538](https://github.com/ClickHouse/ClickHouse/issues/38538) Allow using of higher-order functions in window expressions. [#39112](https://github.com/ClickHouse/ClickHouse/pull/39112) ([Dmitry Novik](https://github.com/novikd)).
* Keep `LowCardinality` type in `tuple` function. Previously `LowCardinality` type was dropped and elements of created tuple had underlying type of `LowCardinality`. [#39113](https://github.com/ClickHouse/ClickHouse/pull/39113) ([Anton Popov](https://github.com/CurtizJ)).
* Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix unexpected query result when both `optimize_trivial_count_query` and `empty_result_for_aggregation_by_empty_set` are set to true. This fixes [#39140](https://github.com/ClickHouse/ClickHouse/issues/39140). [#39155](https://github.com/ClickHouse/ClickHouse/pull/39155) ([Amos Bird](https://github.com/amosbird)).
* Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix extremely rare race condition in during hardlinks for remote filesystem. The only way to reproduce it is concurrent run of backups. [#39190](https://github.com/ClickHouse/ClickHouse/pull/39190) ([alesapin](https://github.com/alesapin)).
* (zero-copy replication is an experimental feature that should not be used in production) Fix fetch of in-memory part with `allow_remote_fs_zero_copy_replication`. [#39214](https://github.com/ClickHouse/ClickHouse/pull/39214) ([Azat Khuzhin](https://github.com/azat)).
* (MaterializedPostgreSQL - experimental feature). Fix segmentation fault in MaterializedPostgreSQL database engine, which could happen if some exception occurred at replication initialisation. Closes [#36939](https://github.com/ClickHouse/ClickHouse/issues/36939). [#39272](https://github.com/ClickHouse/ClickHouse/pull/39272) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix incorrect fetch of table metadata from PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix projection exception when aggregation keys are wrapped inside other functions. This fixes [#37151](https://github.com/ClickHouse/ClickHouse/issues/37151). [#37155](https://github.com/ClickHouse/ClickHouse/pull/37155) ([Amos Bird](https://github.com/amosbird)).
* Fix possible logical error `... with argument with type Nothing and default implementation for Nothing is expected to return result with type Nothing, got ...` in some functions. Closes: [#37610](https://github.com/ClickHouse/ClickHouse/issues/37610) Closes: [#37741](https://github.com/ClickHouse/ClickHouse/issues/37741). [#37759](https://github.com/ClickHouse/ClickHouse/pull/37759) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix incorrect columns order in subqueries of UNION (in case of duplicated columns in subselects may produce incorrect result). [#37887](https://github.com/ClickHouse/ClickHouse/pull/37887) ([Azat Khuzhin](https://github.com/azat)).
* Fix incorrect work of MODIFY ALTER Column with column names that contain dots. Closes [#37907](https://github.com/ClickHouse/ClickHouse/issues/37907). [#37971](https://github.com/ClickHouse/ClickHouse/pull/37971) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix "Missing columns" for GLOBAL JOIN with CTE without alias. [#38056](https://github.com/ClickHouse/ClickHouse/pull/38056) ([Azat Khuzhin](https://github.com/azat)).
* Rewrite tuple functions as literals in backwards-compatibility mode. [#38096](https://github.com/ClickHouse/ClickHouse/pull/38096) ([Anton Kozlov](https://github.com/tonickkozlov)).
* Fix redundant memory reservation for output block during `ORDER BY`. [#38127](https://github.com/ClickHouse/ClickHouse/pull/38127) ([iyupeng](https://github.com/iyupeng)).
* Fix possible logical error `Bad cast from type DB::IColumn* to DB::ColumnNullable*` in array mapped functions. Closes [#38006](https://github.com/ClickHouse/ClickHouse/issues/38006). [#38132](https://github.com/ClickHouse/ClickHouse/pull/38132) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix temporary name clash in partial merge join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#38135](https://github.com/ClickHouse/ClickHouse/pull/38135) ([Vladimir C](https://github.com/vdimir)).
* Some minr issue with queries like `CREATE TABLE nested_name_tuples (`a` Tuple(x String, y Tuple(i Int32, j String))) ENGINE = Memory;` [#38136](https://github.com/ClickHouse/ClickHouse/pull/38136) ([lgbo](https://github.com/lgbo-ustc)).
* Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)).
* (Window View is a experimental feature) Fix LOGICAL_ERROR for WINDOW VIEW with incorrect structure. [#38205](https://github.com/ClickHouse/ClickHouse/pull/38205) ([Azat Khuzhin](https://github.com/azat)).
* Update librdkafka submodule to fix crash when an OAUTHBEARER refresh callback is set. [#38225](https://github.com/ClickHouse/ClickHouse/pull/38225) ([Rafael Acevedo](https://github.com/racevedoo)).
* Fix INSERT into Distributed hung due to ProfileEvents. [#38307](https://github.com/ClickHouse/ClickHouse/pull/38307) ([Azat Khuzhin](https://github.com/azat)).
* Fix retries in PostgreSQL engine. [#38310](https://github.com/ClickHouse/ClickHouse/pull/38310) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result). [#38324](https://github.com/ClickHouse/ClickHouse/pull/38324) ([Azat Khuzhin](https://github.com/azat)).
* Fix RabbitMQ with formats based on PeekableReadBuffer. Closes [#38061](https://github.com/ClickHouse/ClickHouse/issues/38061). [#38356](https://github.com/ClickHouse/ClickHouse/pull/38356) ([Kseniia Sumarokova](https://github.com/kssenii)).
* MaterializedPostgreSQL - experimentail feature. Fix possible `Invalid number of rows in Chunk` in MaterializedPostgreSQL. Closes [#37323](https://github.com/ClickHouse/ClickHouse/issues/37323). [#38360](https://github.com/ClickHouse/ClickHouse/pull/38360) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix RabbitMQ configuration with connection string setting. Closes [#36531](https://github.com/ClickHouse/ClickHouse/issues/36531). [#38365](https://github.com/ClickHouse/ClickHouse/pull/38365) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix PostgreSQL engine not using PostgreSQL schema when retrieving array dimension size. Closes [#36755](https://github.com/ClickHouse/ClickHouse/issues/36755). Closes [#36772](https://github.com/ClickHouse/ClickHouse/issues/36772). [#38366](https://github.com/ClickHouse/ClickHouse/pull/38366) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix possibly incorrect result of distributed queries with `DISTINCT` and `LIMIT`. Fixes [#38282](https://github.com/ClickHouse/ClickHouse/issues/38282). [#38371](https://github.com/ClickHouse/ClickHouse/pull/38371) ([Anton Popov](https://github.com/CurtizJ)).
* Fix wrong results of countSubstrings() & position() on patterns with 0-bytes. [#38589](https://github.com/ClickHouse/ClickHouse/pull/38589) ([Robert Schulze](https://github.com/rschu1ze)).
* Now it's possible to start a clickhouse-server and attach/detach tables even for tables with the incorrect values of IPv4/IPv6 representation. Proper fix for issue [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#38590](https://github.com/ClickHouse/ClickHouse/pull/38590) ([alesapin](https://github.com/alesapin)).
* `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `parallel_view_processing=1` with `optimize_trivial_insert_select=1`. Fix `max_insert_threads` while pushing to views. [#38731](https://github.com/ClickHouse/ClickHouse/pull/38731) ([Azat Khuzhin](https://github.com/azat)).
* Fix use-after-free for aggregate functions with `Map` combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)).
### <a id="226"></a> ClickHouse release 22.6, 2022-06-16
#### Backward Incompatible Change

View File

@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.14)
cmake_minimum_required(VERSION 3.15)
project(ClickHouse LANGUAGES C CXX ASM)
@ -74,18 +74,13 @@ message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON)
# DEVELOPER ONLY.
# Faster linking if turned on.
option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files" OFF)
option(CLICKHOUSE_SPLIT_BINARY "Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled" OFF)
if (NOT USE_STATIC_LIBRARIES)
# DEVELOPER ONLY.
# Faster linking if turned on.
option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files")
option(CLICKHOUSE_SPLIT_BINARY
"Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled")
endif ()
if (USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without USE_STATIC_LIBRARIES=0 has no effect.")
if (USE_STATIC_LIBRARIES AND (SPLIT_SHARED_LIBRARIES OR CLICKHOUSE_SPLIT_BINARY))
message(FATAL_ERROR "SPLIT_SHARED_LIBRARIES=1 or CLICKHOUSE_SPLIT_BINARY=1 must not be used together with USE_STATIC_LIBRARIES=1")
endif()
if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
@ -335,6 +330,22 @@ if (COMPILER_GCC OR COMPILER_CLANG)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32")
endif ()
if (ARCH_AMD64)
# align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change,
# which makes benchmark results more stable.
set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries")
if (COMPILER_GCC)
# gcc is in assembler, need to add "-Wa," prefix
set(BRANCHES_WITHIN_32B_BOUNDARIES "-Wa,${BRANCHES_WITHIN_32B_BOUNDARIES}")
endif()
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("${BRANCHES_WITHIN_32B_BOUNDARIES}" HAS_BRANCHES_WITHIN_32B_BOUNDARIES)
if (HAS_BRANCHES_WITHIN_32B_BOUNDARIES)
set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
endif()
endif()
if (COMPILER_GCC)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcoroutines")
endif ()
@ -507,14 +518,14 @@ if (NOT ENABLE_JEMALLOC)
message (WARNING "Non default allocator is disabled. This is not recommended for production builds.")
endif ()
macro (add_executable target)
macro (clickhouse_add_executable target)
# invoke built-in add_executable
# explicitly acquire and interpose malloc symbols by clickhouse_malloc
# if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
if (ARCH_AMD64 AND GLIBC_COMPATIBILITY AND ENABLE_THINLTO)
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:memcpy>)
add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:memcpy>)
else ()
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)
add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)
endif ()
get_target_property (type ${target} TYPE)

View File

@ -12,7 +12,14 @@
#define JSON_MAX_DEPTH 100
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
#endif
POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept, hicpp-use-noexcept)
#ifdef __clang__
# pragma clang diagnostic pop
#endif
/// Прочитать беззнаковое целое в простом формате из не-0-terminated строки.

View File

@ -38,8 +38,14 @@
*/
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
#endif
POCO_DECLARE_EXCEPTION(Foundation_API, JSONException, Poco::Exception)
#ifdef __clang__
# pragma clang diagnostic pop
#endif
class JSON
{

View File

@ -93,7 +93,6 @@
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation))
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
# define NO_SANITIZE_UNDEFINED
# define NO_SANITIZE_ADDRESS
@ -101,6 +100,13 @@
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
#endif
#if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 14
# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation))
#else
# define DISABLE_SANITIZER_INSTRUMENTATION
#endif
#if !__has_include(<sanitizer/asan_interface.h>) || !defined(ADDRESS_SANITIZER)
# define ASAN_UNPOISON_MEMORY_REGION(a, b)
# define ASAN_POISON_MEMORY_REGION(a, b)

View File

@ -1,2 +1,2 @@
add_executable (dump_variable dump_variable.cpp)
clickhouse_add_executable (dump_variable dump_variable.cpp)
target_link_libraries (dump_variable PRIVATE clickhouse_common_io)

1
benchmark/README.md Normal file
View File

@ -0,0 +1 @@
Benchmark is located in a separate repository: https://github.com/ClickHouse/ClickBench

View File

@ -1,22 +0,0 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do
echo -n "["
for i in $(seq 1 $TRIES); do
while true; do
RES=$(command time -f %e -o /dev/stdout curl -sS -G --data-urlencode "query=$query" --data "default_format=Null&max_memory_usage=100000000000&max_memory_usage_for_all_queries=100000000000&max_concurrent_queries_for_user=100&database=*$YT_CLIQUE_ID" --location-trusted -H "Authorization: OAuth $YT_TOKEN" "$YT_PROXY.yt.yandex.net/query" 2>/dev/null);
if [[ $? == 0 ]]; then
[[ $RES =~ 'fail|Exception' ]] || break;
fi
done
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -1,29 +0,0 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
if [ -x ./clickhouse ]
then
CLICKHOUSE_CLIENT="./clickhouse client"
elif command -v clickhouse-client >/dev/null 2>&1
then
CLICKHOUSE_CLIENT="clickhouse-client"
else
echo "clickhouse-client is not found"
exit 1
fi
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(${CLICKHOUSE_CLIENT} --time --format=Null --max_memory_usage=100G --query="$query" 2>&1)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -1,19 +0,0 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do
echo -n "["
for i in $(seq 1 $TRIES); do
while true; do
RES=$(command time -f %e -o time ./yql --clickhouse --syntax-version 1 -f empty <<< "USE chyt.hume; PRAGMA max_memory_usage = 100000000000; PRAGMA max_memory_usage_for_all_queries = 100000000000; $query" >/dev/null 2>&1 && cat time) && break;
done
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -1,43 +0,0 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
PARAMS="--host ... --secure --password ..."
if [ -x ./clickhouse ]
then
CLICKHOUSE_CLIENT="./clickhouse client"
elif command -v clickhouse-client >/dev/null 2>&1
then
CLICKHOUSE_CLIENT="clickhouse-client"
else
echo "clickhouse-client is not found"
exit 1
fi
QUERY_ID_PREFIX="benchmark_$RANDOM"
QUERY_NUM=1
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query
do
for i in $(seq 1 $TRIES)
do
QUERY_ID="${QUERY_ID_PREFIX}_${QUERY_NUM}_${i}"
${CLICKHOUSE_CLIENT} ${PARAMS} --query_id "${QUERY_ID}" --format=Null --max_memory_usage=100G --query="$query"
echo -n '.'
done
QUERY_NUM=$((QUERY_NUM + 1))
echo
done
sleep 10
${CLICKHOUSE_CLIENT} ${PARAMS} --query "
WITH extractGroups(query_id, '(\d+)_(\d+)\$') AS num_run, num_run[1]::UInt8 AS num, num_run[2]::UInt8 AS run
SELECT groupArrayInsertAt(query_duration_ms / 1000, (run - 1)::UInt8)::String || ','
FROM clusterAllReplicas(default, system.query_log)
WHERE event_date >= yesterday() AND type = 2 AND query_id LIKE '${QUERY_ID_PREFIX}%'
GROUP BY num ORDER BY num FORMAT TSV
"

View File

@ -1,43 +0,0 @@
SELECT count() FROM {table};
SELECT count() FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM {table} ;
SELECT sum(UserID) FROM {table} ;
SELECT uniq(UserID) FROM {table} ;
SELECT uniq(SearchPhrase) FROM {table} ;
SELECT min(EventDate), max(EventDate) FROM {table} ;
SELECT AdvEngineID, count() FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC;
SELECT RegionID, uniq(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, uniq(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10;
SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10;
SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890;
SELECT count() FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, any(URL), count() AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM {table} GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;

File diff suppressed because one or more lines are too long

View File

@ -1,43 +0,0 @@
SELECT count(*) FROM hits;
SELECT count(*) FROM hits WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
SELECT sum(UserID) FROM hits;
SELECT COUNT(DISTINCT UserID) FROM hits;
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
SELECT min(EventDate), max(EventDate) FROM hits;
SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM hits WHERE UserID = 12345678901234567890;
SELECT count(*) FROM hits WHERE URL::TEXT LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL::TEXT LIKE '%metrika%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title::TEXT LIKE '%Яндекс%' AND URL::TEXT NOT LIKE '%.yandex.%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM hits WHERE URL::TEXT LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(octet_length(URL)) AS l, count(*) AS c FROM hits WHERE octet_length(URL) > 0 GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT regexp_replace(Referer::TEXT, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(octet_length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE octet_length(Referer) > 0 GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(URL) > 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(Title) > 0 GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) ORDER BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime)));

File diff suppressed because one or more lines are too long

View File

@ -1,43 +0,0 @@
Folder structure
______________
dump_dataset_from_ch.sh - bash script that dumps a dataset from Clickhouse
schema.sql - schema for a Greenplum cluster to load dumped dataset in
load_data_set.sql - the script that loads up a dumped dataset
queries.sql - SQL statements used in the benchmark
benchmark.sh - this piece of bash conducts a benchmark
result_parser.py - script to parse benchmark.sh's output and produce python code to build a graph to compare up to 4 benchmark results.
Requirements
____________
Greenplum uses a separate server as a point of entry, so you need 2 servers at least to run a cluster: master and segment hosts. 2 segments host and 56 segments(28 per host) had been used while conducting the test.
You has has to put segment hostnames in the benchmark.sh.
Greenplum quick installation instructions
_________________________________________
Obtain a stable Greenplum version here(4.3.9.1 was used while conducting the benchmark):
https://network.pivotal.io/products/pivotal-gpdb
and install it using this detailed guide:
http://gpdb.docs.pivotal.io/4340/install_guide/install_guide.html
You should change gp_interconnect_type to 'tcp' if cluster members are connected via 1GB link or lower.
There are some variables that has to be changed prior the first benchmark run: gp_vmem_protect_limit and max_statement_mem to allow each segment to use more virtual memory. Here are commands to change this GUCS that has to be executed as gpadmin at the master host:
gpconfig -c gp_interconnect_type -v tcp
gpconfig -c gp_vmem_protect_limit -v 3000
gpconfig -c max_statement_mem -v '4000MB'
How to prepare data
-------------------
One can prepare datasets to run the benchmark on using dump_dataset_from_ch.sh script from this repo. The script has to be run at at Clickhouse host. It takes a long time to get dumps.
Upload the datasets into Greenplum master.Then run schema.sql to prepare schema and load_data_set.sql to load data up. This operation also takes a long time.
How to conduct the benchmark
__________________________
There is a benchmark.sh that take some arguments. Here is the syntax:
./benchmark.sh sql_statements_file tablename dbname orca_switch
If you don't know about the last one then just use a default value.

View File

@ -1,30 +0,0 @@
#!/usr/bin/env bash
filename=${1-queries.sql}
table=$2
dbname=$3
orca=${4-on}
host1=somehost
host2=somehost
mem='15GB'
cat $filename | sed "s/{table}/$table/g" | while read query ;
do
ssh -n $host1 'echo 3 | tee /proc/sys/vm/drop_caches; sync' > /dev/null
ssh -n $host2 'echo 3 | tee /proc/sys/vm/drop_caches; sync' > /dev/null
sleep 5
echo $query | egrep "SELECT UserID, date_trunc\('minute', EventTime\) AS m|SELECT Referer AS key, avg\(length\(Referer\)\) AS l|SELECT URL, count(1) AS c FROM.*GROUP BY URL|SELECT 1, URL, count\(1\) AS c FROM.*GROUP BY 1" && mem='10GB'
echo $query | egrep 'SELECT DISTINCT|GROUP BY UserID, SearchPhrase LIMIT 10|count\(DISTINCT UserID\) AS u' && mem='5GB'
echo "####################"
echo "$query"
echo "Timestamp_begin:$(date)"
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='256MB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
echo "Timestamp_end:$(date)"
echo "Timestamp_begin:$(date)"
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='50GB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
echo "Timestamp_end:$(date)"
echo "Timestamp_begin:$(date)"
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='50GB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
echo "Timestamp_end:$(date)"
echo "$query"
echo '####################'
done

View File

@ -1,5 +0,0 @@
#!/usr/bin/env bash
for table in hits_10m_single hits_100m_single hits_1000m_single; do
clickhouse-client -q "SELECT (round(WatchID/2), JavaEnable, Title, GoodEvent, EventTime, EventDate, CounterID, ClientIP, RegionID,round(UserID/2), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, ClientEventTime, SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce,round(FUniqID/2), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, LocalEventTime, Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID,round(RefererHash/2),round(URLHash/2), CLID) FROM $table FORMAT CSV" > $table
done

View File

@ -1,12 +0,0 @@
COPY hits_all_10m FROM '/data/hits_10m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
CREATE INDEX pk_counterid_eventdate_userid_10m ON hits_all_10m USING btree (counterid, eventdate, userid);
CREATE INDEX idx_10m_counterid on hits_all_10m using btree (counterid); CREATE INDEX idx_10m_userid on hits_all_10m using btree (userid);
ANALYZE hits_all_10m;
COPY hits_all_100m from '/data/hits_100m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
CREATE INDEX pk_counterid_eventdate_userid_100m ON hits_all_100m USING btree (counterid, eventdate, userid);
CREATE INDEX idx_100m_counterid on hits_all_100m using btree (counterid); CREATE INDEX idx_100m_userid on hits_all_100m using btree (userid);
ANALYZE hits_all_100m;
COPY hits_all_1000m from '/data/hits_1000m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
CREATE INDEX pk_counterid_eventdate_userid_1000m ON hits_all_1000m USING btree (counterid, eventdate, userid);
CREATE INDEX idx_1000m_counterid on hits_all_1000m using btree (counterid); CREATE INDEX idx_1000m_userid on hits_all_1000m using btree (userid);
ANALYZE hits_all_1000m;

View File

@ -1,43 +0,0 @@
SELECT count(1) FROM {table}
SELECT count(1) FROM {table} WHERE AdvEngineID != 0
SELECT sum(AdvEngineID), count(1), avg(ResolutionWidth) FROM {table}
SELECT sum(UserID) FROM {table}
SELECT count(UserID) FROM ( SELECT DISTINCT UserID FROM {table} ) AS d
SELECT count(SearchPhrase) FROM ( SELECT DISTINCT SearchPhrase FROM {table} ) AS d
SELECT min(EventDate), max(EventDate) FROM {table}
SELECT AdvEngineID, count(1) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY 2 DESC
SELECT RegionID, count(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10
SELECT RegionID, sum(AdvEngineID), count(1) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10
SELECT SearchPhrase, count(1) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10
SELECT SearchEngineID, SearchPhrase, count(1) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10
SELECT UserID, count(1) FROM {table} GROUP BY UserID ORDER BY 2 DESC LIMIT 10
SELECT UserID, SearchPhrase, count(1) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY 3 DESC LIMIT 10
SELECT UserID, SearchPhrase, count(1) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10
SELECT UserID, date_trunc('minute', EventTime) AS m, SearchPhrase, count(1) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(1) DESC LIMIT 10
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890
SELECT count(1) FROM {table} WHERE URL LIKE '%metrika%'
SELECT SearchPhrase, max(URL) as URL, count(1) AS c FROM {table} h WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10
SELECT SearchPhrase, max(URL) as URL, min(Title) as Title, count(1) AS c, count(DISTINCT UserID) FROM {table} WHERE Title LIKE '%\xd0\xaf\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT count(1) FROM {table}
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10
SELECT CounterID, avg(length(URL)) AS l, count(1) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(1) > 100000 ORDER BY l DESC LIMIT 25
SELECT Referer AS key, avg(length(Referer)) AS l, count(1) AS c, Referer FROM {table} WHERE Referer != '' GROUP BY key HAVING count(1) > 100000 ORDER BY l DESC LIMIT 25
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table}
SELECT SearchEngineID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10
SELECT WatchID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10
SELECT WatchID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10
SELECT URL, count(1) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10
SELECT 1, URL, count(1) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10
SELECT ClientIP AS x, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(1) AS c FROM {table} GROUP BY x, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10
SELECT URL, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND DontCountHits =0 AND Refresh = 0 AND URL <>'' GROUP BY URL ORDER BY PageViews DESC LIMIT 10
SELECT Title, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate BETWEEN '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND DontCountHits=0 AND Refresh=0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10
SELECT URL, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, case when (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END Src, URL AS Dst, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND TraficSourceID IN (-1, 6) AND RefererHash = 7135345792483900000 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100
SELECT WindowClientWidth, WindowClientHeight, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND DontCountHits =0 AND URLHash = 7135345792483900000 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT date_trunc('minute', EventTime) AS Minute, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND DontCountHits =0 GROUP BY Minute ORDER BY Minute;

View File

@ -1,150 +0,0 @@
#!/usr/bin/env python3
import sys
import json
def parse_block(block=[], options=[]):
# print('block is here', block)
# show_query = False
# show_query = options.show_query
result = []
query = block[0].strip()
if len(block) > 4:
timing1 = block[1].strip().split()[1]
timing2 = block[3].strip().split()[1]
timing3 = block[5].strip().split()[1]
else:
timing1 = block[1].strip().split()[1]
timing2 = block[2].strip().split()[1]
timing3 = block[3].strip().split()[1]
if options.show_queries:
result.append(query)
if not options.show_first_timings:
result += [timing1, timing2, timing3]
else:
result.append(timing1)
return result
def read_stats_file(options, fname):
result = []
int_result = []
block = []
time_count = 1
with open(fname) as f:
for line in f.readlines():
if "SELECT" in line:
if len(block) > 1:
result.append(parse_block(block, options))
block = [line]
elif "Time:" in line:
block.append(line)
return result
def compare_stats_files(options, arguments):
result = []
file_output = []
pyplot_colors = ["y", "b", "g", "r"]
for fname in arguments[1:]:
file_output.append((read_stats_file(options, fname)))
if len(file_output[0]) > 0:
timings_count = len(file_output[0])
for idx, data_set in enumerate(file_output):
int_result = []
for timing in data_set:
int_result.append(float(timing[0])) # y values
result.append(
[
[x for x in range(0, len(int_result))],
int_result,
pyplot_colors[idx] + "^",
]
)
# result.append([x for x in range(1, len(int_result)) ]) #x values
# result.append( pyplot_colors[idx] + '^' )
return result
def parse_args():
from optparse import OptionParser
parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
parser.add_option(
"-q",
"--show-queries",
help="Show statements along with timings",
action="store_true",
dest="show_queries",
)
parser.add_option(
"-f",
"--show-first-timings",
help="Show only first tries timings",
action="store_true",
dest="show_first_timings",
)
parser.add_option(
"-c",
"--compare-mode",
help="Prepare output for pyplot comparing result files.",
action="store",
dest="compare_mode",
)
(options, arguments) = parser.parse_args(sys.argv)
if len(arguments) < 2:
parser.print_usage()
sys.exit(1)
return (options, arguments)
def gen_pyplot_code(options, arguments):
result = ""
data_sets = compare_stats_files(options, arguments)
for idx, data_set in enumerate(data_sets, start=0):
x_values, y_values, line_style = data_set
result += "\nplt.plot("
result += "%s, %s, '%s'" % (x_values, y_values, line_style)
result += ", label='%s try')" % idx
print("import matplotlib.pyplot as plt")
print(result)
print("plt.xlabel('Try number')")
print("plt.ylabel('Timing')")
print("plt.title('Benchmark query timings')")
print("plt.legend()")
print("plt.show()")
def gen_html_json(options, arguments):
tuples = read_stats_file(options, arguments[1])
print("{")
print('"system: GreenPlum(x2),')
print(('"version": "%s",' % "4.3.9.1"))
print('"data_size": 10000000,')
print('"time": "",')
print('"comments": "",')
print('"result":')
print("[")
for s in tuples:
print(s)
print("]")
print("}")
def main():
(options, arguments) = parse_args()
if len(arguments) > 2:
gen_pyplot_code(options, arguments)
else:
gen_html_json(options, arguments)
if __name__ == "__main__":
main()

View File

@ -1,3 +0,0 @@
CREATE TABLE hits_all_10m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column, compresstype=quicklz) DISTRIBUTED BY (userid) ;
CREATE TABLE hits_all_100m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column, compresstype=quicklz) DISTRIBUTED BY (userid) ;
CREATE TABLE hits_all_1000m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column,compresstype=quicklz) DISTRIBUTED BY (userid) ;

View File

@ -1,247 +0,0 @@
#!/bin/bash -e
QUERIES_FILE="queries.sql"
TRIES=3
mkdir -p clickhouse-benchmark
pushd clickhouse-benchmark
# Download the binary
if [[ ! -x clickhouse ]]; then
curl https://clickhouse.com/ | sh
fi
if [[ ! -f $QUERIES_FILE ]]; then
wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/$QUERIES_FILE"
fi
uptime
echo "Starting clickhouse-server"
./clickhouse server >/dev/null 2>&1 &
PID=$!
function finish {
kill $PID
wait
}
trap finish EXIT
echo "Waiting for clickhouse-server to start"
for i in {1..30}; do
sleep 1
./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.'
if [[ $i == 30 ]]; then exit 1; fi
done
if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse client --query "SELECT count() FROM hits") == '100000000' ]]; then
echo "Dataset already downloaded"
else
echo "Will download the dataset"
if [ "`uname`" = "Darwin" ]
then
./clickhouse client --receive_timeout 1000 --max_insert_threads $(sysctl -n hw.ncpu) --progress --query "
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
else
./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query "
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
fi
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits"
fi
if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active") == '1' ]]; then
echo "Dataset already prepared"
else
echo "Will prepare the dataset"
./clickhouse client --receive_timeout 1000 --query "OPTIMIZE TABLE hits FINAL"
fi
echo
echo "Will perform benchmark. Results:"
echo
>result.csv
QUERY_NUM=1
cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do
sync
if [ "`uname`" = "Darwin" ]
then
sudo purge > /dev/null
else
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
fi
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
echo "${QUERY_NUM},${i},${RES}" >> result.csv
done
echo "],"
QUERY_NUM=$((QUERY_NUM + 1))
done
echo
echo "Benchmark complete. System info:"
echo
touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt
if [ "`uname`" = "Darwin" ]
then
echo '----Version, build id-----------'
./clickhouse local --query "SELECT format('Version: {}', version())"
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------'
sysctl hw.model | tee cpu_model.txt
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' | tee cpu.txt
echo '----Disk Free and Total--------'
df -h . | tee df.txt
echo '----Memory Free and Total-------'
vm_stat | tee memory.txt
echo '----Physical Memory Amount------'
ls -l /var/vm | tee memory_total.txt
echo '--------------------------------'
else
echo '----Version, build id-----------'
./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())"
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------'
cat /proc/cpuinfo | grep -i -F 'model name' | uniq | tee cpu_model.txt
lscpu | tee cpu.txt
echo '----Block Devices---------------'
lsblk | tee blk.txt
echo '----Disk Free and Total--------'
df -h . | tee df.txt
echo '----Memory Free and Total-------'
free -h | tee memory.txt
echo '----Physical Memory Amount------'
cat /proc/meminfo | grep MemTotal | tee memory_total.txt
echo '----RAID Info-------------------'
cat /proc/mdstat| tee mdstat.txt
echo '--------------------------------'
fi
echo
echo "Instance type from IMDS (if available):"
curl -s --connect-timeout 1 'http://169.254.169.254/latest/meta-data/instance-type' | tee instance.txt
echo
echo "Uploading the results (if possible)"
UUID=$(./clickhouse local --query "SELECT generateUUIDv4()")
./clickhouse local --query "
SELECT
'${UUID}' AS run_id,
version() AS version,
now() AS test_time,
(SELECT value FROM system.settings WHERE name = 'max_threads') AS threads,
filesystemCapacity() AS fs_capacity,
filesystemAvailable() AS fs_available,
file('cpu_model.txt') AS cpu_model,
file('cpu.txt') AS cpu,
file('df.txt') AS df,
file('memory.txt') AS memory,
file('memory_total.txt') AS memory_total,
file('blk.txt') AS blk,
file('mdstat.txt') AS mdstat,
file('instance.txt') AS instance
" | tee meta.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
INSERT INTO benchmark_runs
(run_id, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance)
FORMAT TSV" || echo "Cannot upload results."
./clickhouse local --query "
SELECT
'${UUID}' AS run_id,
c1 AS query_num,
c2 AS try_num,
c3 AS time
FROM file('result.csv')
" | tee results.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
INSERT INTO benchmark_results
(run_id, query_num, try_num, time)
FORMAT TSV" || echo "Cannot upload results. Please send the output to feedback@clickhouse.com"
<<////
Server Setup:
CREATE TABLE benchmark_runs
(
run_id UUID,
version String,
test_time DateTime,
threads String,
fs_capacity UInt64,
fs_available UInt64,
cpu_model String,
cpu String,
df String,
memory String,
memory_total String,
blk String,
mdstat String,
instance String
) ENGINE = ReplicatedMergeTree ORDER BY run_id;
CREATE TABLE benchmark_results
(
run_id UUID,
query_num UInt8,
try_num UInt8,
time Decimal32(3)
) ENGINE = ReplicatedMergeTree ORDER BY (run_id, query_num, try_num);
CREATE USER benchmark IDENTIFIED WITH no_password SETTINGS max_rows_to_read = 1, max_result_rows = 1, max_execution_time = 1;
CREATE QUOTA benchmark
KEYED BY ip_address
FOR RANDOMIZED INTERVAL 1 MINUTE MAX query_inserts = 4, written_bytes = 100000,
FOR RANDOMIZED INTERVAL 1 HOUR MAX query_inserts = 10, written_bytes = 500000,
FOR RANDOMIZED INTERVAL 1 DAY MAX query_inserts = 50, written_bytes = 2000000
TO benchmark;
GRANT INSERT ON benchmark_runs TO benchmark;
GRANT INSERT ON benchmark_results TO benchmark;
Example query:
SELECT
cpu_model,
threads,
instance,
k
FROM
(
SELECT
run_id,
exp(avg(log(adjusted_time / best_time))) AS k
FROM
(
WITH greatest(time, 0.01) AS adjusted_time
SELECT
run_id,
adjusted_time,
min(adjusted_time) OVER (PARTITION BY query_num, try_num) AS best_time
FROM benchmark_results
WHERE try_num > 1
)
GROUP BY run_id
ORDER BY k ASC
) AS t
INNER JOIN benchmark_runs USING (run_id)
////

View File

@ -1,4 +0,0 @@
CONF_DIR=/home/kartavyy/benchmark/hive
expect_file=$CONF_DIR/expect.tcl
test_file=$CONF_DIR/queries.sql
etc_init_d_service=

View File

@ -1,9 +0,0 @@
create table hits_10m_raw ( WatchID BIGINT, JavaEnable SMALLINT, Title STRING, GoodEvent SMALLINT, EventTime TIMESTAMP, EventDate TIMESTAMP, CounterID BIGINT, ClientIP BIGINT, RegionID BIGINT, UserID BIGINT, CounterClass TINYINT, OS SMALLINT, UserAgent SMALLINT, URL STRING, Referer STRING, Refresh TINYINT, RefererCategoryID INT, RefererRegionID BIGINT, URLCategoryID INT, URLRegionID BIGINT, ResolutionWidth INT, ResolutionHeight INT, ResolutionDepth SMALLINT, FlashMajor SMALLINT, FlashMinor SMALLINT, FlashMinor2 STRING, NetMajor SMALLINT, NetMinor SMALLINT, UserAgentMajor INT, UserAgentMinor STRING, CookieEnable SMALLINT, JavascriptEnable SMALLINT, IsMobile SMALLINT, MobilePhone SMALLINT, MobilePhoneModel STRING, Params STRING, IPNetworkID BIGINT, TraficSourceID SMALLINT, SearchEngineID INT, SearchPhrase STRING, AdvEngineID SMALLINT, IsArtifical SMALLINT, WindowClientWidth INT, WindowClientHeight INT, ClientTimeZone INT, ClientEventTime TIMESTAMP, SilverlightVersion1 SMALLINT, SilverlightVersion2 SMALLINT, SilverlightVersion3 BIGINT, SilverlightVersion4 INT, PageCharset STRING, CodeVersion BIGINT, IsLink SMALLINT, IsDownload SMALLINT, IsNotBounce SMALLINT, FUniqID BIGINT, OriginalURL STRING, HID BIGINT, IsOldCounter SMALLINT, IsEvent SMALLINT, IsParameter SMALLINT, DontCountHits SMALLINT, WithHash SMALLINT, HitColor STRING, LocalEventTime TIMESTAMP, Age SMALLINT, Sex SMALLINT, Income SMALLINT, Interests INT, Robotness SMALLINT, RemoteIP BIGINT, WindowName INT, OpenerName INT, HistoryLength SMALLINT, BrowserLanguage STRING, BrowserCountry STRING, SocialNetwork STRING, SocialAction STRING, HTTPError INT, SendTiming BIGINT, DNSTiming BIGINT, ConnectTiming BIGINT, ResponseStartTiming BIGINT, ResponseEndTiming BIGINT, FetchTiming BIGINT, SocialSourceNetworkID SMALLINT, SocialSourcePage STRING, ParamPrice BIGINT, ParamOrderID STRING, ParamCurrency STRING, ParamCurrencyID INT, OpenstatServiceName STRING, OpenstatCampaignID STRING, OpenstatAdID STRING, OpenstatSourceID STRING, UTMSource STRING, UTMMedium STRING, UTMCampaign STRING, UTMContent STRING, UTMTerm STRING, FromTag STRING, HasGCLID SMALLINT, RefererHash BIGINT, URLHash BIGINT, CLID BIGINT, UserIDHash BIGINT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;
load data local inpath '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' overwrite into table hits_10m_raw;
create table hits_10m ( WatchID BIGINT, JavaEnable SMALLINT, Title STRING, GoodEvent SMALLINT, EventTime TIMESTAMP, EventDate TIMESTAMP, CounterID BIGINT, ClientIP BIGINT, RegionID BIGINT, UserID BIGINT, CounterClass TINYINT, OS SMALLINT, UserAgent SMALLINT, URL STRING, Referer STRING, Refresh TINYINT, RefererCategoryID INT, RefererRegionID BIGINT, URLCategoryID INT, URLRegionID BIGINT, ResolutionWidth INT, ResolutionHeight INT, ResolutionDepth SMALLINT, FlashMajor SMALLINT, FlashMinor SMALLINT, FlashMinor2 STRING, NetMajor SMALLINT, NetMinor SMALLINT, UserAgentMajor INT, UserAgentMinor STRING, CookieEnable SMALLINT, JavascriptEnable SMALLINT, IsMobile SMALLINT, MobilePhone SMALLINT, MobilePhoneModel STRING, Params STRING, IPNetworkID BIGINT, TraficSourceID SMALLINT, SearchEngineID INT, SearchPhrase STRING, AdvEngineID SMALLINT, IsArtifical SMALLINT, WindowClientWidth INT, WindowClientHeight INT, ClientTimeZone INT, ClientEventTime TIMESTAMP, SilverlightVersion1 SMALLINT, SilverlightVersion2 SMALLINT, SilverlightVersion3 BIGINT, SilverlightVersion4 INT, PageCharset STRING, CodeVersion BIGINT, IsLink SMALLINT, IsDownload SMALLINT, IsNotBounce SMALLINT, FUniqID BIGINT, OriginalURL STRING, HID BIGINT, IsOldCounter SMALLINT, IsEvent SMALLINT, IsParameter SMALLINT, DontCountHits SMALLINT, WithHash SMALLINT, HitColor STRING, LocalEventTime TIMESTAMP, Age SMALLINT, Sex SMALLINT, Income SMALLINT, Interests INT, Robotness SMALLINT, RemoteIP BIGINT, WindowName INT, OpenerName INT, HistoryLength SMALLINT, BrowserLanguage STRING, BrowserCountry STRING, SocialNetwork STRING, SocialAction STRING, HTTPError INT, SendTiming BIGINT, DNSTiming BIGINT, ConnectTiming BIGINT, ResponseStartTiming BIGINT, ResponseEndTiming BIGINT, FetchTiming BIGINT, SocialSourceNetworkID SMALLINT, SocialSourcePage STRING, ParamPrice BIGINT, ParamOrderID STRING, ParamCurrency STRING, ParamCurrencyID INT, OpenstatServiceName STRING, OpenstatCampaignID STRING, OpenstatAdID STRING, OpenstatSourceID STRING, UTMSource STRING, UTMMedium STRING, UTMCampaign STRING, UTMContent STRING, UTMTerm STRING, FromTag STRING, HasGCLID SMALLINT, RefererHash BIGINT, URLHash BIGINT, CLID BIGINT, UserIDHash BIGINT ) CLUSTERED BY (EventDate) SORTED BY(CounterID, EventDate, UserIDHash, EventTime) INTO 10 BUCKETS STORED AS ORC tblproperties("orc.compress"="ZLIB");
insert overwrite table hits_10m select * from hits_10m_raw;
--drop table hits_10m_raw;

View File

@ -1,18 +0,0 @@
#!/usr/bin/env bash
#!/bin/expect
# Set timeout
set timeout 600
# Get arguments
set query [lindex $argv 0]
spawn hive
expect "hive>"
send "$query;\r"
expect "hive>"
send "quit;\r"
expect eof

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,624 +0,0 @@
start time: Вт. сент. 10 18:46:00 MSK 2013
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_15579@mturlrep13_201309101846_67163557.txt
hive> ;
hive> quit;
times: 1
query: SELECT count(*) FROM hits_10m;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_16038@mturlrep13_201309101846_623079473.txt
hive> SELECT count(*) FROM hits_10m;;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0036
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
2013-09-10 18:46:20,061 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:46:27,089 Stage-1 map = 7%, reduce = 0%
2013-09-10 18:46:33,113 Stage-1 map = 14%, reduce = 0%
2013-09-10 18:46:36,127 Stage-1 map = 22%, reduce = 0%
2013-09-10 18:46:39,143 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:40,149 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:41,156 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:42,162 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:43,168 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:44,174 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:45,179 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:46,185 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:47,191 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:48,197 Stage-1 map = 43%, reduce = 0%, Cumulative CPU 46.41 sec
2013-09-10 18:46:49,205 Stage-1 map = 47%, reduce = 0%, Cumulative CPU 62.51 sec
2013-09-10 18:46:50,211 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
2013-09-10 18:46:51,217 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
2013-09-10 18:46:52,222 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
2013-09-10 18:46:53,227 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
2013-09-10 18:46:54,233 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
2013-09-10 18:46:55,238 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
2013-09-10 18:46:56,244 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
2013-09-10 18:46:57,250 Stage-1 map = 54%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:46:58,255 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:46:59,261 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:00,266 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:01,272 Stage-1 map = 61%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:02,277 Stage-1 map = 61%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:03,282 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:04,287 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:05,305 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:06,310 Stage-1 map = 69%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:07,316 Stage-1 map = 73%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:08,321 Stage-1 map = 73%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:09,326 Stage-1 map = 76%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:10,331 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:11,336 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:12,341 Stage-1 map = 84%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:13,346 Stage-1 map = 88%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:14,351 Stage-1 map = 88%, reduce = 17%, Cumulative CPU 83.95 sec
2013-09-10 18:47:15,356 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
2013-09-10 18:47:16,372 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
2013-09-10 18:47:17,379 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
2013-09-10 18:47:18,384 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 118.21 sec
2013-09-10 18:47:19,388 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 162.76 sec
2013-09-10 18:47:20,393 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 162.76 sec
2013-09-10 18:47:21,397 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 162.76 sec
2013-09-10 18:47:22,404 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
2013-09-10 18:47:23,410 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
2013-09-10 18:47:24,415 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
MapReduce Total cumulative CPU time: 2 minutes 45 seconds 270 msec
Ended Job = job_201309101627_0036
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 165.27 sec HDFS Read: 1082943442 HDFS Write: 9 SUCCESS
Total MapReduce CPU Time Spent: 2 minutes 45 seconds 270 msec
OK
10000000
Time taken: 74.228 seconds, Fetched: 1 row(s)
hive> quit;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_17475@mturlrep13_201309101847_1783698271.txt
hive> ;
hive> quit;
times: 1
query: SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_17882@mturlrep13_201309101847_1295809350.txt
hive> SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0037
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
2013-09-10 18:47:44,058 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:47:49,086 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
2013-09-10 18:47:50,093 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
2013-09-10 18:47:51,101 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
2013-09-10 18:47:52,107 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
2013-09-10 18:47:53,113 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
2013-09-10 18:47:54,119 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 18.18 sec
2013-09-10 18:47:55,125 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.81 sec
2013-09-10 18:47:56,130 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.81 sec
2013-09-10 18:47:57,138 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
2013-09-10 18:47:58,144 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
2013-09-10 18:47:59,150 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
MapReduce Total cumulative CPU time: 25 seconds 640 msec
Ended Job = job_201309101627_0037
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 25.64 sec HDFS Read: 907716 HDFS Write: 7 SUCCESS
Total MapReduce CPU Time Spent: 25 seconds 640 msec
OK
171127
Time taken: 25.153 seconds, Fetched: 1 row(s)
hive> quit;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_19147@mturlrep13_201309101848_1891179156.txt
hive> ;
hive> quit;
times: 1
query: SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_19567@mturlrep13_201309101848_690102300.txt
hive> SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0038
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
2013-09-10 18:48:18,837 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:48:25,865 Stage-1 map = 39%, reduce = 0%
2013-09-10 18:48:26,875 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
2013-09-10 18:48:27,882 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
2013-09-10 18:48:28,889 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
2013-09-10 18:48:29,895 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
2013-09-10 18:48:30,901 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
2013-09-10 18:48:31,907 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
2013-09-10 18:48:32,914 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
2013-09-10 18:48:33,920 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
2013-09-10 18:48:34,925 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
2013-09-10 18:48:35,930 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
2013-09-10 18:48:36,935 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
2013-09-10 18:48:37,940 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
2013-09-10 18:48:38,945 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 35.24 sec
2013-09-10 18:48:39,952 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 36.63 sec
2013-09-10 18:48:40,958 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 36.63 sec
MapReduce Total cumulative CPU time: 36 seconds 630 msec
Ended Job = job_201309101627_0038
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 36.63 sec HDFS Read: 8109219 HDFS Write: 30 SUCCESS
Total MapReduce CPU Time Spent: 36 seconds 630 msec
OK
Time taken: 31.961 seconds, Fetched: 1 row(s)
hive> quit;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_20898@mturlrep13_201309101848_327652001.txt
hive> ;
hive> quit;
times: 1
query: SELECT sum(UserID) FROM hits_10m;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_21336@mturlrep13_201309101848_1975614127.txt
hive> SELECT sum(UserID) FROM hits_10m;;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0039
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
2013-09-10 18:49:00,561 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:49:07,617 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
2013-09-10 18:49:08,626 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
2013-09-10 18:49:09,634 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
2013-09-10 18:49:10,639 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
2013-09-10 18:49:11,646 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
2013-09-10 18:49:12,652 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
2013-09-10 18:49:13,658 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 21.86 sec
2013-09-10 18:49:14,664 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
2013-09-10 18:49:15,670 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
2013-09-10 18:49:16,675 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
2013-09-10 18:49:17,680 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
2013-09-10 18:49:18,685 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
2013-09-10 18:49:19,690 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
2013-09-10 18:49:20,697 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 32.07 sec
2013-09-10 18:49:21,703 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 32.07 sec
MapReduce Total cumulative CPU time: 32 seconds 70 msec
Ended Job = job_201309101627_0039
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 32.07 sec HDFS Read: 57312623 HDFS Write: 21 SUCCESS
Total MapReduce CPU Time Spent: 32 seconds 70 msec
OK
-4662894107982093709
Time taken: 30.94 seconds, Fetched: 1 row(s)
hive> quit;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_22560@mturlrep13_201309101849_2023198520.txt
hive> ;
hive> quit;
times: 1
query: SELECT count(DISTINCT UserID) FROM hits_10m;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_22993@mturlrep13_201309101849_961728603.txt
hive> SELECT count(DISTINCT UserID) FROM hits_10m;;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0040
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
2013-09-10 18:49:41,232 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:49:48,264 Stage-1 map = 43%, reduce = 0%
2013-09-10 18:49:51,283 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
2013-09-10 18:49:52,291 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
2013-09-10 18:49:53,298 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
2013-09-10 18:49:54,304 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
2013-09-10 18:49:55,310 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
2013-09-10 18:49:56,317 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
2013-09-10 18:49:57,332 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
2013-09-10 18:49:58,337 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
2013-09-10 18:49:59,342 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
2013-09-10 18:50:00,348 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
2013-09-10 18:50:01,353 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
2013-09-10 18:50:02,360 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
2013-09-10 18:50:03,365 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
2013-09-10 18:50:04,369 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
2013-09-10 18:50:05,375 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
2013-09-10 18:50:06,379 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
2013-09-10 18:50:07,385 Stage-1 map = 100%, reduce = 88%, Cumulative CPU 55.01 sec
2013-09-10 18:50:08,391 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
2013-09-10 18:50:09,397 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
2013-09-10 18:50:10,402 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
MapReduce Total cumulative CPU time: 1 minutes 2 seconds 950 msec
Ended Job = job_201309101627_0040
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 62.95 sec HDFS Read: 57312623 HDFS Write: 8 SUCCESS
Total MapReduce CPU Time Spent: 1 minutes 2 seconds 950 msec
OK
2037258
Time taken: 38.84 seconds, Fetched: 1 row(s)
hive> quit;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_24634@mturlrep13_201309101850_840502487.txt
hive> ;
hive> quit;
times: 1
query: SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_25401@mturlrep13_201309101850_84750246.txt
hive> SELECT count(DISTINCT SearchPhrase) FROM hits_10m;;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0041
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
2013-09-10 18:50:31,472 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:50:38,501 Stage-1 map = 43%, reduce = 0%
2013-09-10 18:50:40,517 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
2013-09-10 18:50:41,523 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
2013-09-10 18:50:42,531 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
2013-09-10 18:50:43,536 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
2013-09-10 18:50:44,542 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
2013-09-10 18:50:45,548 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
2013-09-10 18:50:46,555 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
2013-09-10 18:50:47,561 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 21.42 sec
2013-09-10 18:50:48,566 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 31.8 sec
2013-09-10 18:50:49,571 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
2013-09-10 18:50:50,576 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
2013-09-10 18:50:51,581 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
2013-09-10 18:50:52,587 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
2013-09-10 18:50:53,592 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
2013-09-10 18:50:54,597 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
2013-09-10 18:50:55,602 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
2013-09-10 18:50:56,607 Stage-1 map = 100%, reduce = 92%, Cumulative CPU 42.95 sec
2013-09-10 18:50:57,615 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 50.6 sec
2013-09-10 18:50:58,642 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 50.6 sec
MapReduce Total cumulative CPU time: 50 seconds 600 msec
Ended Job = job_201309101627_0041
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 50.6 sec HDFS Read: 27820105 HDFS Write: 8 SUCCESS
Total MapReduce CPU Time Spent: 50 seconds 600 msec
OK
1110413
Time taken: 37.04 seconds, Fetched: 1 row(s)
hive> quit;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_26718@mturlrep13_201309101851_285967686.txt
hive> ;
hive> quit;
times: 1
query: SELECT min(EventDate), max(EventDate) FROM hits_10m;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_27149@mturlrep13_201309101851_2135309314.txt
hive> SELECT min(EventDate), max(EventDate) FROM hits_10m;;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0042
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
2013-09-10 18:51:19,077 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:51:25,106 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
2013-09-10 18:51:26,114 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
2013-09-10 18:51:27,123 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
2013-09-10 18:51:28,129 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
2013-09-10 18:51:29,135 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
2013-09-10 18:51:30,141 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
2013-09-10 18:51:31,147 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 20.4 sec
2013-09-10 18:51:32,152 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 27.44 sec
2013-09-10 18:51:33,158 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
2013-09-10 18:51:34,163 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
2013-09-10 18:51:35,168 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
2013-09-10 18:51:36,173 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
2013-09-10 18:51:37,179 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
2013-09-10 18:51:38,184 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
2013-09-10 18:51:39,192 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 29.39 sec
2013-09-10 18:51:40,198 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 29.39 sec
MapReduce Total cumulative CPU time: 29 seconds 390 msec
Ended Job = job_201309101627_0042
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 29.39 sec HDFS Read: 597016 HDFS Write: 6 SUCCESS
Total MapReduce CPU Time Spent: 29 seconds 390 msec
OK
Time taken: 30.908 seconds, Fetched: 1 row(s)
hive> quit;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_28401@mturlrep13_201309101851_891001725.txt
hive> ;
hive> quit;
times: 1
query: SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_28836@mturlrep13_201309101851_1054092389.txt
hive> SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;;
Total MapReduce jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 2
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0043
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
2013-09-10 18:51:59,809 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:52:04,838 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
2013-09-10 18:52:05,847 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
2013-09-10 18:52:06,855 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
2013-09-10 18:52:07,861 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
2013-09-10 18:52:08,868 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
2013-09-10 18:52:09,875 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 18.07 sec
2013-09-10 18:52:10,881 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.92 sec
2013-09-10 18:52:11,887 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.92 sec
2013-09-10 18:52:12,894 Stage-1 map = 100%, reduce = 67%, Cumulative CPU 25.68 sec
2013-09-10 18:52:13,901 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 27.53 sec
2013-09-10 18:52:14,908 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 27.53 sec
MapReduce Total cumulative CPU time: 27 seconds 530 msec
Ended Job = job_201309101627_0043
Launching Job 2 out of 2
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0044
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
2013-09-10 18:52:17,388 Stage-2 map = 0%, reduce = 0%
2013-09-10 18:52:19,396 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
2013-09-10 18:52:20,401 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
2013-09-10 18:52:21,406 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
2013-09-10 18:52:22,411 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
2013-09-10 18:52:23,415 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
2013-09-10 18:52:24,420 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
2013-09-10 18:52:25,425 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
2013-09-10 18:52:26,430 Stage-2 map = 100%, reduce = 33%, Cumulative CPU 0.75 sec
2013-09-10 18:52:27,436 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
2013-09-10 18:52:28,442 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
2013-09-10 18:52:29,448 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
MapReduce Total cumulative CPU time: 2 seconds 140 msec
Ended Job = job_201309101627_0044
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 2 Cumulative CPU: 27.53 sec HDFS Read: 907716 HDFS Write: 384 SUCCESS
Job 1: Map: 1 Reduce: 1 Cumulative CPU: 2.14 sec HDFS Read: 1153 HDFS Write: 60 SUCCESS
Total MapReduce CPU Time Spent: 29 seconds 670 msec
OK
Time taken: 39.506 seconds, Fetched: 9 row(s)
hive> quit;
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_30667@mturlrep13_201309101852_966681525.txt
hive> ;
hive> quit;
times: 1
query: SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_31123@mturlrep13_201309101852_1252745596.txt
hive> SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;;
Total MapReduce jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 2
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0045
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
2013-09-10 18:52:49,457 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:52:56,485 Stage-1 map = 43%, reduce = 0%
2013-09-10 18:52:59,503 Stage-1 map = 46%, reduce = 0%, Cumulative CPU 14.56 sec
2013-09-10 18:53:00,511 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
2013-09-10 18:53:01,519 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
2013-09-10 18:53:02,526 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
2013-09-10 18:53:03,533 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
2013-09-10 18:53:04,539 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
2013-09-10 18:53:05,545 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
2013-09-10 18:53:06,550 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
2013-09-10 18:53:07,557 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
2013-09-10 18:53:08,563 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
2013-09-10 18:53:09,569 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
2013-09-10 18:53:10,575 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 44.01 sec
2013-09-10 18:53:11,598 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
2013-09-10 18:53:12,604 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
2013-09-10 18:53:13,609 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
2013-09-10 18:53:14,615 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
2013-09-10 18:53:15,620 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
2013-09-10 18:53:16,627 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
2013-09-10 18:53:17,634 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
2013-09-10 18:53:18,640 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
2013-09-10 18:53:19,646 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
2013-09-10 18:53:20,653 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 71.27 sec
2013-09-10 18:53:21,659 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 71.27 sec
MapReduce Total cumulative CPU time: 1 minutes 11 seconds 270 msec
Ended Job = job_201309101627_0045
Launching Job 2 out of 2
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0046
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
2013-09-10 18:53:25,187 Stage-2 map = 0%, reduce = 0%
2013-09-10 18:53:27,196 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
2013-09-10 18:53:28,202 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
2013-09-10 18:53:29,207 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
2013-09-10 18:53:30,211 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
2013-09-10 18:53:31,216 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
2013-09-10 18:53:32,220 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
2013-09-10 18:53:33,226 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
2013-09-10 18:53:34,231 Stage-2 map = 100%, reduce = 33%, Cumulative CPU 1.42 sec
2013-09-10 18:53:35,237 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 3.16 sec
2013-09-10 18:53:36,243 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 3.16 sec
MapReduce Total cumulative CPU time: 3 seconds 160 msec
Ended Job = job_201309101627_0046
MapReduce Jobs Launched:
Job 0: Map: 4 Reduce: 2 Cumulative CPU: 71.27 sec HDFS Read: 67340015 HDFS Write: 100142 SUCCESS
Job 1: Map: 1 Reduce: 1 Cumulative CPU: 3.16 sec HDFS Read: 100911 HDFS Write: 96 SUCCESS
Total MapReduce CPU Time Spent: 1 minutes 14 seconds 430 msec
OK
Time taken: 56.439 seconds, Fetched: 10 row(s)
hive> quit;
-- агрегация, среднее количество ключей.;
status
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_609@mturlrep13_201309101853_355533849.txt
hive> ;
hive> quit;
times: 1
query: SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;
spawn hive
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_1183@mturlrep13_201309101853_289725544.txt
hive> SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;;
Total MapReduce jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 2
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0047
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
2013-09-10 18:53:55,838 Stage-1 map = 0%, reduce = 0%
2013-09-10 18:54:02,865 Stage-1 map = 29%, reduce = 0%
2013-09-10 18:54:05,876 Stage-1 map = 43%, reduce = 0%
2013-09-10 18:54:08,894 Stage-1 map = 46%, reduce = 0%, Cumulative CPU 16.8 sec
2013-09-10 18:54:09,901 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
2013-09-10 18:54:10,909 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
2013-09-10 18:54:11,915 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
2013-09-10 18:54:12,921 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
2013-09-10 18:54:13,927 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
2013-09-10 18:54:14,932 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
2013-09-10 18:54:15,938 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
2013-09-10 18:54:16,943 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
2013-09-10 18:54:17,949 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
2013-09-10 18:54:18,954 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
2013-09-10 18:54:19,959 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 34.85 sec
2013-09-10 18:54:20,964 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 34.85 sec
2013-09-10 18:54:21,970 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:22,975 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:23,980 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:24,986 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:25,991 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:26,997 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:28,002 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:29,008 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:30,014 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
2013-09-10 18:54:31,021 Stage-1 map = 100%, reduce = 58%, Cumulative CPU 74.39 sec
2013-09-10 18:54:32,027 Stage-1 map = 100%, reduce = 96%, Cumulative CPU 74.39 sec
2013-09-10 18:54:33,033 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
2013-09-10 18:54:34,038 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
2013-09-10 18:54:35,044 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
MapReduce Total cumulative CPU time: 1 minutes 24 seconds 50 msec
Ended Job = job_201309101627_0047
Launching Job 2 out of 2
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0048

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,109 +0,0 @@
SELECT count(*) FROM hits_10m;
SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
SELECT sum(UserID) FROM hits_10m;
SELECT count(DISTINCT UserID) FROM hits_10m;
SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
SELECT min(EventDate), max(EventDate) FROM hits_10m;
SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
-- агрегация, среднее количество ключей.;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по строкам.;
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
SELECT SearchPhrase, count(*) AS c FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
-- агрегация чуть сложнее.;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
-- агрегация по числу и строке, большое количество ключей.;
SELECT UserID, count(*) AS c FROM hits_10m GROUP BY UserID ORDER BY c DESC LIMIT 10;
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
SELECT UserID, SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY c DESC LIMIT 10;
-- ещё более сложная агрегация.;
SELECT UserID, SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
-- то же самое, но без сортировки.;
SELECT UserID, minute(EventTime), SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, minute(EventTime), SearchPhrase ORDER BY c DESC LIMIT 10;
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
SELECT UserID FROM hits_10m WHERE UserID = 12345678901234567890;
-- мощная фильтрация по столбцу типа UInt64.;
SELECT count(*) AS c FROM hits_10m WHERE URL LIKE '%metrika%';
-- фильтрация по поиску подстроки в строке.;
SELECT SearchPhrase, MAX(URL), count(*) AS c FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
-- вынимаем большие столбцы, фильтрация по строке.;
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
-- чуть больше столбцы.;
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
-- плохой запрос - вынимаем все столбцы.;
SELECT SearchPhrase, EventTime FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
-- большая сортировка.;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
-- большая сортировка по строкам.;
SELECT SearchPhrase, EventTime FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-- большая сортировка по кортежу.;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- считаем средние длины URL для крупных счётчиков.;
SELECT SUBSTRING(SUBSTRING(Referer, instr(Referer, '//') + 2), 1, if(0 < instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/') - 1, instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/' ) - 1, 0)), avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_100m WHERE Referer != '' GROUP BY SUBSTRING(SUBSTRING(Referer, instr(Referer, '//') + 2), 1, if(0 < instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/') - 1, instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/' ) - 1, 0)) HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- то же самое, но с разбивкой по доменам.;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
-- много тупых агрегатных функций.;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
-- то же самое, но ещё и без фильтрации.;
SELECT URL, count(*) AS c FROM hits_10m GROUP BY URL ORDER BY c DESC LIMIT 10;
-- агрегация по URL.;
SELECT 1, URL, count(*) AS c FROM hits_10m GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
-- агрегация по URL и числу.;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT DontCountHits != 0 AND NOT Refresh != 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT DontCountHits != 0 AND NOT Refresh != 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND IsLink != 0 AND NOT IsDownload != 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, URL, count(*) as c, if(SearchEngineID = 0 AND AdvEngineID = 0 , Referer, '') as src FROM hits_100m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, URL, if(SearchEngineID = 0 AND AdvEngineID = 0 , Referer, '') ORDER BY c DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND NOT DontCountHits != 0 AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT unix_timestamp(EventTime) - SECOND(EventTime) AS m, count(*) FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-02') AND NOT Refresh != 0 AND NOT DontCountHits != 0 GROUP BY unix_timestamp(EventTime) - SECOND(EventTime) ORDER BY m;

View File

@ -1,2 +0,0 @@
cd /home/kartavyy/benchmark
./benchmark.sh -c hive/conf.sh -n $1 > hive/log/log_$1

View File

@ -1,4 +0,0 @@
CONF_DIR=/home/kartavyy/benchmark/mysql
expect_file=$CONF_DIR/expect.tcl
test_file=$CONF_DIR/queries.sql
etc_init_d_service=/etc/init.d/mysql

View File

@ -1,7 +0,0 @@
create table hits_10m( WatchID BIGINT, JavaEnable TINYINT UNSIGNED, Title VARCHAR(1024), GoodEvent SMALLINT, EventTime DATETIME, EventDate DATE, CounterID INTEGER UNSIGNED, ClientIP INTEGER UNSIGNED, RegionID INTEGER UNSIGNED, UserID BIGINT, CounterClass TINYINT, OS SMALLINT, UserAgent SMALLINT, URL VARCHAR(6072), Referer VARCHAR(2048), Refresh TINYINT, RefererCategoryID SMALLINT UNSIGNED, RefererRegionID INTEGER UNSIGNED, URLCategoryID SMALLINT UNSIGNED, URLRegionID INTEGER UNSIGNED, ResolutionWidth SMALLINT UNSIGNED, ResolutionHeight SMALLINT UNSIGNED, ResolutionDepth TINYINT UNSIGNED, FlashMajor TINYINT UNSIGNED, FlashMinor TINYINT UNSIGNED, FlashMinor2 VARCHAR(256), NetMajor TINYINT UNSIGNED, NetMinor TINYINT UNSIGNED, UserAgentMajor SMALLINT UNSIGNED, UserAgentMinor CHAR(2), CookieEnable TINYINT UNSIGNED, JavascriptEnable TINYINT UNSIGNED, IsMobile TINYINT UNSIGNED, MobilePhone TINYINT UNSIGNED, MobilePhoneModel VARCHAR(80), Params VARCHAR(2048), IPNetworkID INT UNSIGNED, TraficSourceID SMALLINT, SearchEngineID SMALLINT UNSIGNED, SearchPhrase VARCHAR(1024), AdvEngineID TINYINT UNSIGNED, IsArtifical TINYINT UNSIGNED, WindowClientWidth SMALLINT UNSIGNED, WindowClientHeight SMALLINT UNSIGNED, ClientTimeZone INTEGER, ClientEventTime DATETIME, SilverlightVersion1 TINYINT UNSIGNED, SilverlightVersion2 TINYINT UNSIGNED, SilverlightVersion3 INT UNSIGNED, SilverlightVersion4 SMALLINT UNSIGNED, PageCharset VARCHAR(80), CodeVersion INT UNSIGNED, IsLink TINYINT UNSIGNED, IsDownload TINYINT UNSIGNED, IsNotBounce TINYINT UNSIGNED, FUniqID BIGINT, OriginalURL VARCHAR(6072), HID INT UNSIGNED, IsOldCounter TINYINT UNSIGNED, IsEvent TINYINT UNSIGNED, IsParameter TINYINT UNSIGNED, DontCountHits TINYINT UNSIGNED, WithHash TINYINT UNSIGNED, HitColor CHAR(1), LocalEventTime DATETIME, Age TINYINT UNSIGNED, Sex TINYINT UNSIGNED, Income TINYINT UNSIGNED, Interests SMALLINT UNSIGNED, Robotness TINYINT UNSIGNED, RemoteIP INT UNSIGNED, WindowName INT, OpenerName INT, HistoryLength SMALLINT, BrowserLanguage CHAR(2), BrowserCountry CHAR(2), SocialNetwork VARCHAR(128), SocialAction VARCHAR(128), HTTPError SMALLINT UNSIGNED, SendTiming INT UNSIGNED, DNSTiming INT UNSIGNED, ConnectTiming INTEGER UNSIGNED, ResponseStartTiming INTEGER UNSIGNED, ResponseEndTiming INTEGER UNSIGNED, FetchTiming INTEGER UNSIGNED, SocialSourceNetworkID TINYINT UNSIGNED, SocialSourcePage VARCHAR(128), ParamPrice BIGINT, ParamOrderID VARCHAR(80), ParamCurrency CHAR(3), ParamCurrencyID SMALLINT UNSIGNED, OpenstatServiceName VARCHAR(80), OpenstatCampaignID VARCHAR(80), OpenstatAdID VARCHAR(80), OpenstatSourceID VARCHAR(80), UTMSource VARCHAR(256), UTMMedium VARCHAR(256), UTMCampaign VARCHAR(256), UTMContent VARCHAR(256), UTMTerm VARCHAR(256), FromTag VARCHAR(256), HasGCLID TINYINT UNSIGNED, RefererHash BIGINT, URLHash BIGINT, CLID INTEGER UNSIGNED, UserIDHash BIGINT UNSIGNED) ENGINE=MYISAM;
CREATE INDEX hits_10m_ind on hits_10m (CounterID, EventDate, UserIDHash, EventTime) using BTREE;
load data infile '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' into table hits_10m FIELDS TERMINATED BY '\t' ESCAPED BY '\\' ;

View File

@ -1,23 +0,0 @@
#!/usr/bin/env bash
#!/bin/expect
# Set timeout
set timeout 600
# Get arguments
set query [lindex $argv 0]
spawn mysql -u root
expect "mysql>"
send "use hits\r"
expect "mysql>"
send "$query\r"
expect "mysql>"
send "quit\r"
expect eof

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,111 +0,0 @@
SELECT SQL_NO_CACHE count(*) FROM hits_10m;
SELECT SQL_NO_CACHE count(*) FROM hits_10m WHERE AdvEngineID != 0;
SELECT SQL_NO_CACHE sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
SELECT SQL_NO_CACHE sum(UserID) FROM hits_10m;
SELECT SQL_NO_CACHE count(DISTINCT UserID) FROM hits_10m;
SELECT SQL_NO_CACHE count(DISTINCT SearchPhrase) FROM hits_10m;
SELECT SQL_NO_CACHE min(EventDate), max(EventDate) FROM hits_10m;
SELECT SQL_NO_CACHE AdvEngineID, count(*) FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
SELECT SQL_NO_CACHE RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
-- агрегация, среднее количество ключей.;
SELECT SQL_NO_CACHE RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
SELECT SQL_NO_CACHE MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по строкам.;
SELECT SQL_NO_CACHE MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
SELECT SQL_NO_CACHE SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
SELECT SQL_NO_CACHE SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
-- агрегация чуть сложнее.;
SELECT SQL_NO_CACHE SearchEngineID, SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- агрегация по числу и строке, большое количество ключей.;
SELECT SQL_NO_CACHE UserID, count(*) FROM hits_10m GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
SELECT SQL_NO_CACHE UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- ещё более сложная агрегация.;
SELECT SQL_NO_CACHE UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
-- то же самое, но без сортировки.;
SELECT SQL_NO_CACHE UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
SELECT SQL_NO_CACHE UserID FROM hits_10m WHERE UserID = 12345678901234567890;
-- мощная фильтрация по столбцу типа UInt64.;
SELECT SQL_NO_CACHE count(*) FROM hits_10m WHERE URL LIKE '%metrika%';
-- фильтрация по поиску подстроки в строке.;
SELECT SQL_NO_CACHE SearchPhrase, MAX(URL), count(*) FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- вынимаем большие столбцы, фильтрация по строке.;
SELECT SQL_NO_CACHE SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- чуть больше столбцы.;
SELECT SQL_NO_CACHE * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
-- плохой запрос - вынимаем все столбцы.;
SELECT SQL_NO_CACHE SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
-- большая сортировка.;
SELECT SQL_NO_CACHE SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
-- большая сортировка по строкам.;
SELECT SQL_NO_CACHE SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-- большая сортировка по кортежу.;
SELECT SQL_NO_CACHE CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- считаем средние длины URL для крупных счётчиков.;
SELECT SQL_NO_CACHE SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS k, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m WHERE Referer != '' GROUP BY k HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- то же самое, но с разбивкой по доменам.;
SELECT SQL_NO_CACHE sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
-- много тупых агрегатных функций.;
SELECT SQL_NO_CACHE SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
SELECT SQL_NO_CACHE WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
SELECT SQL_NO_CACHE WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- то же самое, но ещё и без фильтрации.;
SELECT SQL_NO_CACHE URL, count(*) FROM hits_10m GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
-- агрегация по URL.;
SELECT SQL_NO_CACHE 1, URL, count(*) FROM hits_10m GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
-- агрегация по URL и числу.;
SELECT SQL_NO_CACHE ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
SELECT SQL_NO_CACHE URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT SQL_NO_CACHE Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT SQL_NO_CACHE URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT SQL_NO_CACHE TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT SQL_NO_CACHE URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT SQL_NO_CACHE WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT SQL_NO_CACHE EventTime - INTERVAL SECOND(EventTime) SECOND AS Minute, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;

View File

@ -1,5 +0,0 @@
CONF_DIR=/home/kartavyy/benchmark/infobright
expect_file=$CONF_DIR/expect.tcl
test_file=$CONF_DIR/queries.sql
etc_init_d_service=/etc/init.d/mysqld-ib

View File

@ -1,111 +0,0 @@
create table hits_10m
(
WatchID BIGINT,
JavaEnable SMALLINT,
Title VARCHAR(1400),
GoodEvent SMALLINT,
EventTime TIMESTAMP,
EventDate DATE,
CounterID BIGINT,
ClientIP BIGINT,
RegionID BIGINT,
UserID BIGINT,
CounterClass TINYINT,
OS SMALLINT,
UserAgent SMALLINT,
URL VARCHAR(7800),
Referer VARCHAR(3125),
Refresh TINYINT,
RefererCategoryID INT,
RefererRegionID BIGINT,
URLCategoryID INT,
URLRegionID BIGINT,
ResolutionWidth INT,
ResolutionHeight INT,
ResolutionDepth SMALLINT,
FlashMajor SMALLINT,
FlashMinor SMALLINT,
FlashMinor2 VARCHAR(256),
NetMajor SMALLINT,
NetMinor SMALLINT,
UserAgentMajor INT,
UserAgentMinor CHAR(2),
CookieEnable SMALLINT,
JavascriptEnable SMALLINT,
IsMobile SMALLINT,
MobilePhone SMALLINT,
MobilePhoneModel VARCHAR(80),
Params VARCHAR(2925),
IPNetworkID BIGINT,
TraficSourceID SMALLINT,
SearchEngineID INT,
SearchPhrase VARCHAR(2008),
AdvEngineID SMALLINT,
IsArtifical SMALLINT,
WindowClientWidth INT,
WindowClientHeight INT,
ClientTimeZone INTEGER,
ClientEventTime TIMESTAMP,
SilverlightVersion1 SMALLINT,
SilverlightVersion2 SMALLINT,
SilverlightVersion3 BIGINT,
SilverlightVersion4 INT,
PageCharset VARCHAR(80),
CodeVersion BIGINT,
IsLink SMALLINT,
IsDownload SMALLINT,
IsNotBounce SMALLINT,
FUniqID BIGINT,
OriginalURL VARCHAR(8181),
HID BIGINT,
IsOldCounter SMALLINT,
IsEvent SMALLINT,
IsParameter SMALLINT,
DontCountHits SMALLINT,
WithHash SMALLINT,
HitColor CHAR(1),
LocalEventTime TIMESTAMP,
Age SMALLINT,
Sex SMALLINT,
Income SMALLINT,
Interests INT,
Robotness SMALLINT,
RemoteIP BIGINT,
WindowName INT,
OpenerName INT,
HistoryLength SMALLINT,
BrowserLanguage CHAR(2),
BrowserCountry CHAR(2),
SocialNetwork VARCHAR(128),
SocialAction VARCHAR(128),
HTTPError INT,
SendTiming BIGINT,
DNSTiming BIGINT,
ConnectTiming BIGINT,
ResponseStartTiming BIGINT,
ResponseEndTiming BIGINT,
FetchTiming BIGINT,
SocialSourceNetworkID SMALLINT,
SocialSourcePage VARCHAR(256),
ParamPrice BIGINT,
ParamOrderID VARCHAR(80),
ParamCurrency CHAR(3),
ParamCurrencyID INT,
OpenstatServiceName VARCHAR(80),
OpenstatCampaignID VARCHAR(512),
OpenstatAdID VARCHAR(80),
OpenstatSourceID VARCHAR(256),
UTMSource VARCHAR(256),
UTMMedium VARCHAR(256),
UTMCampaign VARCHAR(407),
UTMContent VARCHAR(256),
UTMTerm VARCHAR(437),
FromTag VARCHAR(428),
HasGCLID SMALLINT,
RefererHash BIGINT,
URLHash BIGINT,
CLID BIGINT,
UserIDHash BIGINT
);
LOAD DATA INFILE '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' INTO TABLE hits_10m FIELDS TERMINATED BY '\t' ESCAPED BY '\\' ENCLOSED BY "NULL";

View File

@ -1,18 +0,0 @@
#!/usr/bin/env bash
#!/bin/expect
# Set timeout
set timeout 600
# Get arguments
set query [lindex $argv 0]
spawn mysql-ib -u root -D hits
expect "mysql>"
send "$query\r"
expect "mysql>"
send "quit\r"
expect eof

File diff suppressed because it is too large Load Diff

View File

@ -1,113 +0,0 @@
-- set GLOBAL max_length_for_sort_data = 8388608;
SELECT count(*) FROM hits_10m;
SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
SELECT sum(UserID) FROM hits_10m;
SELECT count(DISTINCT UserID) FROM hits_10m;
SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
SELECT min(EventDate), max(EventDate) FROM hits_10m;
SELECT AdvEngineID, count(*) FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
-- агрегация, среднее количество ключей.;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по строкам.;
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
SELECT SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
-- агрегация чуть сложнее.;
SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- агрегация по числу и строке, большое количество ключей.;
SELECT UserID, count(*) FROM hits_10m GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- ещё более сложная агрегация.;
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
-- то же самое, но без сортировки.;
SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
SELECT UserID FROM hits_10m WHERE UserID = 123456789;
-- мощная фильтрация по столбцу типа UInt64.;
SELECT count(*) FROM hits_10m WHERE URL LIKE '%metrika%';
-- фильтрация по поиску подстроки в строке.;
SELECT SearchPhrase, MAX(URL), count(*) FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- вынимаем большие столбцы, фильтрация по строке.;
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- чуть больше столбцы.;
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
-- плохой запрос - вынимаем все столбцы.;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
-- большая сортировка.;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
-- большая сортировка по строкам.;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-- большая сортировка по кортежу.;
SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- считаем средние длины URL для крупных счётчиков.;
SELECT SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS k, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m WHERE Referer != '' GROUP BY k HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- то же самое, но с разбивкой по доменам.;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
-- много тупых агрегатных функций.;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- то же самое, но ещё и без фильтрации.;
SELECT URL, count(*) FROM hits_10m GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
-- агрегация по URL.;
SELECT 1, URL, count(*) FROM hits_10m GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
-- агрегация по URL и числу.;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT EventTime - INTERVAL SECOND(EventTime) SECOND AS Minute, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;

View File

@ -1,20 +0,0 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(mysql -u root -h 127.0.0.1 -P 3306 --database=test -t -vvv -e "$query" 2>&1 | grep ' set ' | grep -oP '\d+\.\d+')
[[ "$?" == "0" ]] && echo -n "$RES" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -1,141 +0,0 @@
Note: column store in MemSQL was introduced in Feb 2014.
http://www.memsql.com/download/
http://docs.memsql.com/docs/latest/setup/setup_onprem.html
wget http://download.memsql.com/8d9f4c4d99a547baa40ba097b171bd15/memsql-3.2.x86_64.deb
scp memsql-3.2.x86_64.deb example05e:~
ssh example05e
sudo dpkg -i memsql-3.2.x86_64.deb
sudo mkdir /opt/memsql-data/
sudo cp -r /var/lib/memsql/data/* /opt/memsql-data/
sudo rm -rf /var/lib/memsql/data
sudo ln -s /opt/memsql-data /var/lib/memsql/data
sudo chown -R memsql /opt/memsql-data
sudo chown -R memsql /var/lib/memsql/data
sudo service memsql start
mysql -u root -h 127.0.0.1 -P 3306 --prompt="memsql> "
CREATE DATABASE test;
USE test;
CREATE TABLE hits_10m
(
WatchID BIGINT,
JavaEnable SMALLINT,
Title VARCHAR(1400),
GoodEvent SMALLINT,
EventTime TIMESTAMP,
EventDate DATE,
CounterID BIGINT,
ClientIP BIGINT,
RegionID BIGINT,
UserID BIGINT,
CounterClass TINYINT,
OS SMALLINT,
UserAgent SMALLINT,
URL VARCHAR(7800),
Referer VARCHAR(3125),
Refresh TINYINT,
RefererCategoryID INT,
RefererRegionID BIGINT,
URLCategoryID INT,
URLRegionID BIGINT,
ResolutionWidth INT,
ResolutionHeight INT,
ResolutionDepth SMALLINT,
FlashMajor SMALLINT,
FlashMinor SMALLINT,
FlashMinor2 VARCHAR(256),
NetMajor SMALLINT,
NetMinor SMALLINT,
UserAgentMajor INT,
UserAgentMinor CHAR(2),
CookieEnable SMALLINT,
JavascriptEnable SMALLINT,
IsMobile SMALLINT,
MobilePhone SMALLINT,
MobilePhoneModel VARCHAR(80),
Params VARCHAR(2925),
IPNetworkID BIGINT,
TraficSourceID SMALLINT,
SearchEngineID INT,
SearchPhrase VARCHAR(2008),
AdvEngineID SMALLINT,
IsArtifical SMALLINT,
WindowClientWidth INT,
WindowClientHeight INT,
ClientTimeZone INTEGER,
ClientEventTime TIMESTAMP,
SilverlightVersion1 SMALLINT,
SilverlightVersion2 SMALLINT,
SilverlightVersion3 BIGINT,
SilverlightVersion4 INT,
PageCharset VARCHAR(80),
CodeVersion BIGINT,
IsLink SMALLINT,
IsDownload SMALLINT,
IsNotBounce SMALLINT,
FUniqID BIGINT,
OriginalURL VARCHAR(8181),
HID BIGINT,
IsOldCounter SMALLINT,
IsEvent SMALLINT,
IsParameter SMALLINT,
DontCountHits SMALLINT,
WithHash SMALLINT,
HitColor CHAR(1),
LocalEventTime TIMESTAMP,
Age SMALLINT,
Sex SMALLINT,
Income SMALLINT,
Interests INT,
Robotness SMALLINT,
RemoteIP BIGINT,
WindowName INT,
OpenerName INT,
HistoryLength SMALLINT,
BrowserLanguage CHAR(2),
BrowserCountry CHAR(2),
SocialNetwork VARCHAR(128),
SocialAction VARCHAR(128),
HTTPError INT,
SendTiming BIGINT,
DNSTiming BIGINT,
ConnectTiming BIGINT,
ResponseStartTiming BIGINT,
ResponseEndTiming BIGINT,
FetchTiming BIGINT,
SocialSourceNetworkID SMALLINT,
SocialSourcePage VARCHAR(256),
ParamPrice BIGINT,
ParamOrderID VARCHAR(80),
ParamCurrency CHAR(3),
ParamCurrencyID INT,
OpenstatServiceName VARCHAR(80),
OpenstatCampaignID VARCHAR(512),
OpenstatAdID VARCHAR(80),
OpenstatSourceID VARCHAR(256),
UTMSource VARCHAR(256),
UTMMedium VARCHAR(256),
UTMCampaign VARCHAR(407),
UTMContent VARCHAR(256),
UTMTerm VARCHAR(437),
FromTag VARCHAR(428),
HasGCLID SMALLINT,
RefererHash BIGINT,
URLHash BIGINT,
CLID BIGINT,
INDEX ColumnStoreIndex USING CLUSTERED COLUMNSTORE (CounterID, EventDate, UserID, EventTime)
);
Table creation takes about 15 seconds.
LOAD DATA INFILE '/opt/dumps/hits_10m_corrected.tsv' INTO TABLE hits_10m;
12 min 24.51 sec
13422 rows/sec.
data size: 1 613 773 528 bytes.

View File

@ -1,43 +0,0 @@
SELECT count(*) FROM hits_10m;
SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
SELECT sum(UserID) FROM hits_10m;
SELECT count(DISTINCT UserID) FROM hits_10m;
SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
SELECT min(EventDate), max(EventDate) FROM hits_10m;
SELECT AdvEngineID, count(*) FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, count(*) FROM hits_10m GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM hits_10m WHERE UserID = 123456789;
SELECT count(*) FROM hits_10m WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, MAX(URL), count(*) FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS k, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m WHERE Referer != '' GROUP BY k HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT URL, count(*) FROM hits_10m GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
SELECT 1, URL, count(*) FROM hits_10m GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT EventTime - INTERVAL SECOND(EventTime) SECOND AS Minute, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +0,0 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
./send-query "$query" 2>&1 | grep -P '\d+ tuple|clk: |unknown|overflow|error';
done;
done;

View File

@ -1,356 +0,0 @@
Go to https://www.monetdb.org/
Dowload now.
Latest binary releases.
Ubuntu & Debian.
https://www.monetdb.org/downloads/deb/
Go to the server where you want to install MonetDB.
```
$ sudo mcedit /etc/apt/sources.list.d/monetdb.list
```
Write:
```
deb https://dev.monetdb.org/downloads/deb/ bionic monetdb
```
```
$ wget --output-document=- https://www.monetdb.org/downloads/MonetDB-GPG-KEY | sudo apt-key add -
$ sudo apt update
$ sudo apt install monetdb5-sql monetdb-client
$ sudo systemctl enable monetdbd
$ sudo systemctl start monetdbd
$ sudo usermod -a -G monetdb $USER
```
Logout and login back to your server.
Tutorial:
https://www.monetdb.org/Documentation/UserGuide/Tutorial
Creating the database:
```
$ sudo mkdir /opt/monetdb
$ sudo chmod 777 /opt/monetdb
$ monetdbd create /opt/monetdb
$ monetdbd start /opt/monetdb
cannot remove socket files
```
Now you have to stop MonetDB, copy the contents of `/var/monetdb5` to `/opt/monetdb` and replace the `/var/monetdb5` with symlink to `/opt/monetdb`. This is necessary, because I don't have free space in `/var` and creation of database in `/opt` did not succeed.
Start MonetDB again.
```
$ sudo systemctl start monetdbd
```
```
$ monetdb create test
created database in maintenance mode: test
$ monetdb release test
taken database out of maintenance mode: test
```
Run client:
```
$ mclient -u monetdb -d test
```
Type password: monetdb
```
CREATE TABLE hits
(
"WatchID" BIGINT,
"JavaEnable" TINYINT,
"Title" TEXT,
"GoodEvent" SMALLINT,
"EventTime" TIMESTAMP,
"EventDate" Date,
"CounterID" INTEGER,
"ClientIP" INTEGER,
"RegionID" INTEGER,
"UserID" BIGINT,
"CounterClass" TINYINT,
"OS" TINYINT,
"UserAgent" TINYINT,
"URL" TEXT,
"Referer" TEXT,
"Refresh" TINYINT,
"RefererCategoryID" SMALLINT,
"RefererRegionID" INTEGER,
"URLCategoryID" SMALLINT,
"URLRegionID" INTEGER,
"ResolutionWidth" SMALLINT,
"ResolutionHeight" SMALLINT,
"ResolutionDepth" TINYINT,
"FlashMajor" TINYINT,
"FlashMinor" TINYINT,
"FlashMinor2" TEXT,
"NetMajor" TINYINT,
"NetMinor" TINYINT,
"UserAgentMajor" SMALLINT,
"UserAgentMinor" TEXT,
"CookieEnable" TINYINT,
"JavascriptEnable" TINYINT,
"IsMobile" TINYINT,
"MobilePhone" TINYINT,
"MobilePhoneModel" TEXT,
"Params" TEXT,
"IPNetworkID" INTEGER,
"TraficSourceID" TINYINT,
"SearchEngineID" SMALLINT,
"SearchPhrase" TEXT,
"AdvEngineID" TINYINT,
"IsArtifical" TINYINT,
"WindowClientWidth" SMALLINT,
"WindowClientHeight" SMALLINT,
"ClientTimeZone" SMALLINT,
"ClientEventTime" TIMESTAMP,
"SilverlightVersion1" TINYINT,
"SilverlightVersion2" TINYINT,
"SilverlightVersion3" INTEGER,
"SilverlightVersion4" SMALLINT,
"PageCharset" TEXT,
"CodeVersion" INTEGER,
"IsLink" TINYINT,
"IsDownload" TINYINT,
"IsNotBounce" TINYINT,
"FUniqID" BIGINT,
"OriginalURL" TEXT,
"HID" INTEGER,
"IsOldCounter" TINYINT,
"IsEvent" TINYINT,
"IsParameter" TINYINT,
"DontCountHits" TINYINT,
"WithHash" TINYINT,
"HitColor" TEXT,
"LocalEventTime" TIMESTAMP,
"Age" TINYINT,
"Sex" TINYINT,
"Income" TINYINT,
"Interests" SMALLINT,
"Robotness" TINYINT,
"RemoteIP" INTEGER,
"WindowName" INTEGER,
"OpenerName" INTEGER,
"HistoryLength" SMALLINT,
"BrowserLanguage" TEXT,
"BrowserCountry" TEXT,
"SocialNetwork" TEXT,
"SocialAction" TEXT,
"HTTPError" SMALLINT,
"SendTiming" INTEGER,
"DNSTiming" INTEGER,
"ConnectTiming" INTEGER,
"ResponseStartTiming" INTEGER,
"ResponseEndTiming" INTEGER,
"FetchTiming" INTEGER,
"SocialSourceNetworkID" TINYINT,
"SocialSourcePage" TEXT,
"ParamPrice" BIGINT,
"ParamOrderID" TEXT,
"ParamCurrency" TEXT,
"ParamCurrencyID" SMALLINT,
"OpenstatServiceName" TEXT,
"OpenstatCampaignID" TEXT,
"OpenstatAdID" TEXT,
"OpenstatSourceID" TEXT,
"UTMSource" TEXT,
"UTMMedium" TEXT,
"UTMCampaign" TEXT,
"UTMContent" TEXT,
"UTMTerm" TEXT,
"FromTag" TEXT,
"HasGCLID" TINYINT,
"RefererHash" BIGINT,
"URLHash" BIGINT,
"CLID" INTEGER
);
```
# How to prepare data
Download the 100 million rows dataset from here and insert into ClickHouse:
https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/
Create the dataset from ClickHouse:
```
SELECT
toInt64(WatchID) = -9223372036854775808 ? -9223372036854775807 : toInt64(WatchID),
toInt8(JavaEnable) = -128 ? -127 : toInt8(JavaEnable),
toValidUTF8(toString(Title)),
toInt16(GoodEvent) = -32768 ? -32767 : toInt16(GoodEvent),
EventTime,
EventDate,
toInt32(CounterID) = -2147483648 ? -2147483647 : toInt32(CounterID),
toInt32(ClientIP) = -2147483648 ? -2147483647 : toInt32(ClientIP),
toInt32(RegionID) = -2147483648 ? -2147483647 : toInt32(RegionID),
toInt64(UserID) = -9223372036854775808 ? -9223372036854775807 : toInt64(UserID),
toInt8(CounterClass) = -128 ? -127 : toInt8(CounterClass),
toInt8(OS) = -128 ? -127 : toInt8(OS),
toInt8(UserAgent) = -128 ? -127 : toInt8(UserAgent),
toValidUTF8(toString(URL)),
toValidUTF8(toString(Referer)),
toInt8(Refresh) = -128 ? -127 : toInt8(Refresh),
toInt16(RefererCategoryID) = -32768 ? -32767 : toInt16(RefererCategoryID),
toInt32(RefererRegionID) = -2147483648 ? -2147483647 : toInt32(RefererRegionID),
toInt16(URLCategoryID) = -32768 ? -32767 : toInt16(URLCategoryID),
toInt32(URLRegionID) = -2147483648 ? -2147483647 : toInt32(URLRegionID),
toInt16(ResolutionWidth) = -32768 ? -32767 : toInt16(ResolutionWidth),
toInt16(ResolutionHeight) = -32768 ? -32767 : toInt16(ResolutionHeight),
toInt8(ResolutionDepth) = -128 ? -127 : toInt8(ResolutionDepth),
toInt8(FlashMajor) = -128 ? -127 : toInt8(FlashMajor),
toInt8(FlashMinor) = -128 ? -127 : toInt8(FlashMinor),
toValidUTF8(toString(FlashMinor2)),
toInt8(NetMajor) = -128 ? -127 : toInt8(NetMajor),
toInt8(NetMinor) = -128 ? -127 : toInt8(NetMinor),
toInt16(UserAgentMajor) = -32768 ? -32767 : toInt16(UserAgentMajor),
toValidUTF8(toString(UserAgentMinor)),
toInt8(CookieEnable) = -128 ? -127 : toInt8(CookieEnable),
toInt8(JavascriptEnable) = -128 ? -127 : toInt8(JavascriptEnable),
toInt8(IsMobile) = -128 ? -127 : toInt8(IsMobile),
toInt8(MobilePhone) = -128 ? -127 : toInt8(MobilePhone),
toValidUTF8(toString(MobilePhoneModel)),
toValidUTF8(toString(Params)),
toInt32(IPNetworkID) = -2147483648 ? -2147483647 : toInt32(IPNetworkID),
toInt8(TraficSourceID) = -128 ? -127 : toInt8(TraficSourceID),
toInt16(SearchEngineID) = -32768 ? -32767 : toInt16(SearchEngineID),
toValidUTF8(toString(SearchPhrase)),
toInt8(AdvEngineID) = -128 ? -127 : toInt8(AdvEngineID),
toInt8(IsArtifical) = -128 ? -127 : toInt8(IsArtifical),
toInt16(WindowClientWidth) = -32768 ? -32767 : toInt16(WindowClientWidth),
toInt16(WindowClientHeight) = -32768 ? -32767 : toInt16(WindowClientHeight),
toInt16(ClientTimeZone) = -32768 ? -32767 : toInt16(ClientTimeZone),
ClientEventTime,
toInt8(SilverlightVersion1) = -128 ? -127 : toInt8(SilverlightVersion1),
toInt8(SilverlightVersion2) = -128 ? -127 : toInt8(SilverlightVersion2),
toInt32(SilverlightVersion3) = -2147483648 ? -2147483647 : toInt32(SilverlightVersion3),
toInt16(SilverlightVersion4) = -32768 ? -32767 : toInt16(SilverlightVersion4),
toValidUTF8(toString(PageCharset)),
toInt32(CodeVersion) = -2147483648 ? -2147483647 : toInt32(CodeVersion),
toInt8(IsLink) = -128 ? -127 : toInt8(IsLink),
toInt8(IsDownload) = -128 ? -127 : toInt8(IsDownload),
toInt8(IsNotBounce) = -128 ? -127 : toInt8(IsNotBounce),
toInt64(FUniqID) = -9223372036854775808 ? -9223372036854775807 : toInt64(FUniqID),
toValidUTF8(toString(OriginalURL)),
toInt32(HID) = -2147483648 ? -2147483647 : toInt32(HID),
toInt8(IsOldCounter) = -128 ? -127 : toInt8(IsOldCounter),
toInt8(IsEvent) = -128 ? -127 : toInt8(IsEvent),
toInt8(IsParameter) = -128 ? -127 : toInt8(IsParameter),
toInt8(DontCountHits) = -128 ? -127 : toInt8(DontCountHits),
toInt8(WithHash) = -128 ? -127 : toInt8(WithHash),
toValidUTF8(toString(HitColor)),
LocalEventTime,
toInt8(Age) = -128 ? -127 : toInt8(Age),
toInt8(Sex) = -128 ? -127 : toInt8(Sex),
toInt8(Income) = -128 ? -127 : toInt8(Income),
toInt16(Interests) = -32768 ? -32767 : toInt16(Interests),
toInt8(Robotness) = -128 ? -127 : toInt8(Robotness),
toInt32(RemoteIP) = -2147483648 ? -2147483647 : toInt32(RemoteIP),
toInt32(WindowName) = -2147483648 ? -2147483647 : toInt32(WindowName),
toInt32(OpenerName) = -2147483648 ? -2147483647 : toInt32(OpenerName),
toInt16(HistoryLength) = -32768 ? -32767 : toInt16(HistoryLength),
toValidUTF8(toString(BrowserLanguage)),
toValidUTF8(toString(BrowserCountry)),
toValidUTF8(toString(SocialNetwork)),
toValidUTF8(toString(SocialAction)),
toInt16(HTTPError) = -32768 ? -32767 : toInt16(HTTPError),
toInt32(SendTiming) = -2147483648 ? -2147483647 : toInt32(SendTiming),
toInt32(DNSTiming) = -2147483648 ? -2147483647 : toInt32(DNSTiming),
toInt32(ConnectTiming) = -2147483648 ? -2147483647 : toInt32(ConnectTiming),
toInt32(ResponseStartTiming) = -2147483648 ? -2147483647 : toInt32(ResponseStartTiming),
toInt32(ResponseEndTiming) = -2147483648 ? -2147483647 : toInt32(ResponseEndTiming),
toInt32(FetchTiming) = -2147483648 ? -2147483647 : toInt32(FetchTiming),
toInt8(SocialSourceNetworkID) = -128 ? -127 : toInt8(SocialSourceNetworkID),
toValidUTF8(toString(SocialSourcePage)),
toInt64(ParamPrice) = -9223372036854775808 ? -9223372036854775807 : toInt64(ParamPrice),
toValidUTF8(toString(ParamOrderID)),
toValidUTF8(toString(ParamCurrency)),
toInt16(ParamCurrencyID) = -32768 ? -32767 : toInt16(ParamCurrencyID),
toValidUTF8(toString(OpenstatServiceName)),
toValidUTF8(toString(OpenstatCampaignID)),
toValidUTF8(toString(OpenstatAdID)),
toValidUTF8(toString(OpenstatSourceID)),
toValidUTF8(toString(UTMSource)),
toValidUTF8(toString(UTMMedium)),
toValidUTF8(toString(UTMCampaign)),
toValidUTF8(toString(UTMContent)),
toValidUTF8(toString(UTMTerm)),
toValidUTF8(toString(FromTag)),
toInt8(HasGCLID) = -128 ? -127 : toInt8(HasGCLID),
toInt64(RefererHash) = -9223372036854775808 ? -9223372036854775807 : toInt64(RefererHash),
toInt64(URLHash) = -9223372036854775808 ? -9223372036854775807 : toInt64(URLHash),
toInt32(CLID) = -2147483648 ? -2147483647 : toInt32(CLID)
FROM hits_100m_obfuscated
INTO OUTFILE '/home/milovidov/example_datasets/hits_100m_obfuscated_monetdb.tsv'
FORMAT TSV;
```
Note that MonetDB does not support the most negative numbers like -128. And we have to convert them by adding one.
It makes impossible to store the values of 64bit identifiers in BIGINT.
Maybe it's a trick to optimize NULLs?
Upload the data:
```
$ mclient -u monetdb -d test
```
Type password: monetdb
```
COPY INTO hits FROM '/home/milovidov/example_datasets/hits_100m_obfuscated_monetdb.tsv' USING DELIMITERS '\t';
```
It takes 28 minutes 02 seconds on a server (Linux Ubuntu, Xeon E5-2560v2, 32 logical CPU, 128 GiB RAM, 8xHDD RAID-5, 40 TB).
It is roughly 60 000 rows per second.
Validate the data:
```
SELECT count(*) FROM hits;
```
Create an index:
```
CREATE INDEX hits_idx ON hits ("CounterID", "EventDate");
```
(it takes 5 seconds)
Run the benchmark:
```
./benchmark.sh | tee log.txt
```
You can find the log in `log.txt` file.
Postprocess data:
```
grep clk log.txt | tr -d '\r' | awk '{ if ($3 == "ms") { print $2 / 1000; } else if ($3 == "sec") { print $2 } else { print } }'
```
Then replace values with "min" (minutes) timing manually and save to `tmp.txt`.
Then process to JSON format:
```
awk '{
if (i % 3 == 0) { a = $1 }
else if (i % 3 == 1) { b = $1 }
else if (i % 3 == 2) { c = $1; print "[" a ", " b ", " c "]," };
++i; }' < tmp.txt
```
And paste to `/website/benchmark/dbms/results/005_monetdb.json` in the repository.

View File

@ -1,341 +0,0 @@
3
SELECT count(*) FROM hits;
1 tuple
clk: 1.262 ms
1 tuple
clk: 1.420 ms
1 tuple
clk: 1.190 ms
3
SELECT count(*) FROM hits WHERE "AdvEngineID" <> 0;
1 tuple
clk: 1.530 sec
1 tuple
clk: 1.489 sec
1 tuple
clk: 1.490 sec
3
SELECT sum("AdvEngineID"), count(*), avg("ResolutionWidth") FROM hits;
1 tuple
clk: 597.512 ms
1 tuple
clk: 579.383 ms
1 tuple
clk: 598.220 ms
3
SELECT sum("UserID") FROM hits;
overflow in calculation.
clk: 568.003 ms
overflow in calculation.
clk: 554.572 ms
overflow in calculation.
clk: 552.076 ms
3
SELECT COUNT(DISTINCT "UserID") FROM hits;
1 tuple
clk: 6.688 sec
1 tuple
clk: 6.689 sec
1 tuple
clk: 6.652 sec
3
SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
1 tuple
clk: 15.702 sec
1 tuple
clk: 17.189 sec
1 tuple
clk: 15.514 sec
3
SELECT min("EventDate"), max("EventDate") FROM hits;
1 tuple
clk: 697.770 ms
1 tuple
clk: 711.870 ms
1 tuple
clk: 697.177 ms
3
SELECT "AdvEngineID", count(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY count(*) DESC;
18 tuples
clk: 1.536 sec
18 tuples
clk: 1.505 sec
18 tuples
clk: 1.492 sec
3
SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
10 tuples
clk: 9.965 sec
10 tuples
clk: 10.106 sec
10 tuples
clk: 10.136 sec
3
SELECT "RegionID", sum("AdvEngineID"), count(*) AS c, avg("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 8.329 sec
10 tuples
clk: 8.601 sec
10 tuples
clk: 8.039 sec
3
SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
10 tuples
clk: 3.385 sec
10 tuples
clk: 3.321 sec
10 tuples
clk: 3.326 sec
3
SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
10 tuples
clk: 3.510 sec
10 tuples
clk: 3.431 sec
10 tuples
clk: 3.382 sec
3
SELECT "SearchPhrase", count(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 10.891 sec
10 tuples
clk: 11.483 sec
10 tuples
clk: 10.352 sec
3
SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
10 tuples
clk: 15.711 sec
10 tuples
clk: 15.444 sec
10 tuples
clk: 15.503 sec
3
SELECT "SearchEngineID", "SearchPhrase", count(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 11.433 sec
10 tuples
clk: 11.399 sec
10 tuples
clk: 11.285 sec
3
SELECT "UserID", count(*) FROM hits GROUP BY "UserID" ORDER BY count(*) DESC LIMIT 10;
10 tuples
clk: 7.184 sec
10 tuples
clk: 7.015 sec
10 tuples
clk: 6.849 sec
3
SELECT "UserID", "SearchPhrase", count(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY count(*) DESC LIMIT 10;
10 tuples
clk: 29.096 sec
10 tuples
clk: 28.328 sec
10 tuples
clk: 29.247 sec
3
SELECT "UserID", "SearchPhrase", count(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10;
10 tuples
clk: 29.457 sec
10 tuples
clk: 29.364 sec
10 tuples
clk: 29.269 sec
3
SELECT "UserID", extract(minute FROM "EventTime") AS m, "SearchPhrase", count(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY count(*) DESC LIMIT 10;
10 tuples
clk: 47.141 sec
10 tuples
clk: 46.495 sec
10 tuples
clk: 46.472 sec
3
SELECT "UserID" FROM hits WHERE "UserID" = -6101065172474983726;
0 tuples
clk: 783.332 ms
0 tuples
clk: 771.157 ms
0 tuples
clk: 783.082 ms
3
SELECT count(*) FROM hits WHERE "URL" LIKE '%metrika%';
1 tuple
clk: 3.963 sec
1 tuple
clk: 3.930 sec
1 tuple
clk: 3.964 sec
3
SELECT "SearchPhrase", min("URL"), count(*) AS c FROM hits WHERE "URL" LIKE '%metrika%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 3.925 sec
10 tuples
clk: 3.817 sec
10 tuples
clk: 3.802 sec
3
SELECT "SearchPhrase", min("URL"), min("Title"), count(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Яндекс%' AND "URL" NOT LIKE '%.yandex.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 6.067 sec
10 tuples
clk: 6.120 sec
10 tuples
clk: 6.012 sec
3
SELECT * FROM hits WHERE "URL" LIKE '%metrika%' ORDER BY "EventTime" LIMIT 10;
10 tuples !87 columns dropped, 29 fields truncated!
clk: 4.251 sec
10 tuples !87 columns dropped, 29 fields truncated!
clk: 4.190 sec
10 tuples !87 columns dropped, 29 fields truncated!
clk: 4.379 sec
3
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
10 tuples
clk: 6.699 sec
10 tuples
clk: 6.718 sec
10 tuples
clk: 6.802 sec
3
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
10 tuples
clk: 6.887 sec
10 tuples
clk: 6.838 sec
10 tuples
clk: 6.844 sec
3
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
10 tuples
clk: 6.806 sec
10 tuples
clk: 6.878 sec
10 tuples
clk: 6.807 sec
3
SELECT "CounterID", avg(length("URL")) AS l, count(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
25 tuples
clk: 1:01 min
25 tuples
clk: 55.553 sec
25 tuples
clk: 56.188 sec
3
SELECT sys.getdomain("Referer") AS key, avg(length("Referer")) AS l, count(*) AS c, min("Referer") FROM hits WHERE "Referer" <> '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
clk: 1:00 min
clk: 1:00 min
clk: 1:00 min
3
SELECT sum("ResolutionWidth"), sum("ResolutionWidth" + 1), sum("ResolutionWidth" + 2), sum("ResolutionWidth" + 3), sum("ResolutionWidth" + 4), sum("ResolutionWidth" + 5), sum("ResolutionWidth" + 6), sum("ResolutionWidth" + 7), sum("ResolutionWidth" + 8), sum("ResolutionWidth" + 9), sum("ResolutionWidth" + 10), sum("ResolutionWidth" + 11), sum("ResolutionWidth" + 12), sum("ResolutionWidth" + 13), sum("ResolutionWidth" + 14), sum("ResolutionWidth" + 15), sum("ResolutionWidth" + 16), sum("ResolutionWidth" + 17), sum("ResolutionWidth" + 18), sum("ResolutionWidth" + 19), sum("ResolutionWidth" + 20), sum("ResolutionWidth" + 21), sum("ResolutionWidth" + 22), sum("ResolutionWidth" + 23), sum("ResolutionWidth" + 24), sum("ResolutionWidth" + 25), sum("ResolutionWidth" + 26), sum("ResolutionWidth" + 27), sum("ResolutionWidth" + 28), sum("ResolutionWidth" + 29), sum("ResolutionWidth" + 30), sum("ResolutionWidth" + 31), sum("ResolutionWidth" + 32), sum("ResolutionWidth" + 33), sum("ResolutionWidth" + 34), sum("ResolutionWidth" + 35), sum("ResolutionWidth" + 36), sum("ResolutionWidth" + 37), sum("ResolutionWidth" + 38), sum("ResolutionWidth" + 39), sum("ResolutionWidth" + 40), sum("ResolutionWidth" + 41), sum("ResolutionWidth" + 42), sum("ResolutionWidth" + 43), sum("ResolutionWidth" + 44), sum("ResolutionWidth" + 45), sum("ResolutionWidth" + 46), sum("ResolutionWidth" + 47), sum("ResolutionWidth" + 48), sum("ResolutionWidth" + 49), sum("ResolutionWidth" + 50), sum("ResolutionWidth" + 51), sum("ResolutionWidth" + 52), sum("ResolutionWidth" + 53), sum("ResolutionWidth" + 54), sum("ResolutionWidth" + 55), sum("ResolutionWidth" + 56), sum("ResolutionWidth" + 57), sum("ResolutionWidth" + 58), sum("ResolutionWidth" + 59), sum("ResolutionWidth" + 60), sum("ResolutionWidth" + 61), sum("ResolutionWidth" + 62), sum("ResolutionWidth" + 63), sum("ResolutionWidth" + 64), sum("ResolutionWidth" + 65), sum("ResolutionWidth" + 66), sum("ResolutionWidth" + 67), sum("ResolutionWidth" + 68), sum("ResolutionWidth" + 69), sum("ResolutionWidth" + 70), sum("ResolutionWidth" + 71), sum("ResolutionWidth" + 72), sum("ResolutionWidth" + 73), sum("ResolutionWidth" + 74), sum("ResolutionWidth" + 75), sum("ResolutionWidth" + 76), sum("ResolutionWidth" + 77), sum("ResolutionWidth" + 78), sum("ResolutionWidth" + 79), sum("ResolutionWidth" + 80), sum("ResolutionWidth" + 81), sum("ResolutionWidth" + 82), sum("ResolutionWidth" + 83), sum("ResolutionWidth" + 84), sum("ResolutionWidth" + 85), sum("ResolutionWidth" + 86), sum("ResolutionWidth" + 87), sum("ResolutionWidth" + 88), sum("ResolutionWidth" + 89) FROM hits;
1 tuple !77 columns dropped!
clk: 6.221 sec
1 tuple !77 columns dropped!
clk: 6.170 sec
1 tuple !77 columns dropped!
clk: 6.382 sec
3
SELECT "SearchEngineID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 5.684 sec
10 tuples
clk: 5.585 sec
10 tuples
clk: 5.463 sec
3
SELECT "WatchID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 6.281 sec
10 tuples
clk: 6.574 sec
10 tuples
clk: 6.243 sec
3
SELECT "WatchID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 44.641 sec
10 tuples
clk: 41.904 sec
10 tuples
clk: 43.218 sec
3
SELECT "URL", count(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 1:24 min
10 tuples
clk: 1:31 min
10 tuples
clk: 1:24 min
3
SELECT 1, "URL", count(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
10 tuples
clk: 1:24 min
10 tuples
clk: 1:25 min
10 tuples
clk: 1:24 min
3
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, count(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
10 tuples
clk: 26.438 sec
10 tuples
clk: 26.033 sec
10 tuples
clk: 26.147 sec
3
SELECT "URL", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "Refresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10;
10 tuples
clk: 4.825 sec
10 tuples
clk: 4.618 sec
10 tuples
clk: 4.623 sec
3
SELECT "Title", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "Refresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY "PageViews" DESC LIMIT 10;
10 tuples
clk: 4.380 sec
10 tuples
clk: 4.418 sec
10 tuples
clk: 4.413 sec
3
SELECT "URL", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 1000;
1000 tuples
clk: 4.259 sec
1000 tuples
clk: 4.195 sec
1000 tuples
clk: 4.195 sec
3
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END, "URL" ORDER BY "PageViews" DESC LIMIT 1000;
1000 tuples
clk: 3.233 sec
1000 tuples
clk: 3.180 sec
1000 tuples
clk: 3.181 sec
3
SELECT "URLHash", "EventDate", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 686716256552154761 GROUP BY "URLHash", "EventDate" ORDER BY "PageViews" DESC LIMIT 100;
0 tuples
clk: 2.656 sec
0 tuples
clk: 2.557 sec
0 tuples
clk: 2.561 sec
3
SELECT "WindowClientWidth", "WindowClientHeight", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 686716256552154761 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY "PageViews" DESC LIMIT 10000;
0 tuples
clk: 4.161 sec
0 tuples
clk: 4.243 sec
0 tuples
clk: 4.166 sec
3
SELECT DATE_TRUNC('minute', "EventTime") AS "Minute", count(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-02' AND "Refresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', "EventTime") ORDER BY DATE_TRUNC('minute', "EventTime");
0 tuples
clk: 4.199 sec
0 tuples
clk: 4.211 sec
0 tuples
clk: 4.190 sec

View File

@ -1,43 +0,0 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE "AdvEngineID" <> 0;
SELECT sum("AdvEngineID"), count(*), avg("ResolutionWidth") FROM {table};
SELECT sum("UserID") FROM {table};
SELECT COUNT(DISTINCT "UserID") FROM {table};
SELECT COUNT(DISTINCT "SearchPhrase") FROM {table};
SELECT min("EventDate"), max("EventDate") FROM {table};
SELECT "AdvEngineID", count(*) FROM {table} WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY count(*) DESC;
SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM {table} GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
SELECT "RegionID", sum("AdvEngineID"), count(*) AS c, avg("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM {table} GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM {table} WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM {table} WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
SELECT "SearchPhrase", count(*) AS c FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
SELECT "SearchEngineID", "SearchPhrase", count(*) AS c FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10;
SELECT "UserID", count(*) FROM {table} GROUP BY "UserID" ORDER BY count(*) DESC LIMIT 10;
SELECT "UserID", "SearchPhrase", count(*) FROM {table} GROUP BY "UserID", "SearchPhrase" ORDER BY count(*) DESC LIMIT 10;
SELECT "UserID", "SearchPhrase", count(*) FROM {table} GROUP BY "UserID", "SearchPhrase" LIMIT 10;
SELECT "UserID", extract(minute FROM "EventTime") AS m, "SearchPhrase", count(*) FROM {table} GROUP BY "UserID", m, "SearchPhrase" ORDER BY count(*) DESC LIMIT 10;
SELECT "UserID" FROM {table} WHERE "UserID" = -6101065172474983726;
SELECT count(*) FROM {table} WHERE "URL" LIKE '%metrika%';
SELECT "SearchPhrase", min("URL"), count(*) AS c FROM {table} WHERE "URL" LIKE '%metrika%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
SELECT "SearchPhrase", min("URL"), min("Title"), count(*) AS c, COUNT(DISTINCT "UserID") FROM {table} WHERE "Title" LIKE '%Яндекс%' AND "URL" NOT LIKE '%.yandex.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE "URL" LIKE '%metrika%' ORDER BY "EventTime" LIMIT 10;
SELECT "SearchPhrase" FROM {table} WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
SELECT "SearchPhrase" FROM {table} WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
SELECT "SearchPhrase" FROM {table} WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
SELECT "CounterID", avg(length("URL")) AS l, count(*) AS c FROM {table} WHERE "URL" <> '' GROUP BY "CounterID" HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sys.getdomain("Referer") AS key, avg(length("Referer")) AS l, count(*) AS c, min("Referer") FROM {table} WHERE "Referer" <> '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum("ResolutionWidth"), sum("ResolutionWidth" + 1), sum("ResolutionWidth" + 2), sum("ResolutionWidth" + 3), sum("ResolutionWidth" + 4), sum("ResolutionWidth" + 5), sum("ResolutionWidth" + 6), sum("ResolutionWidth" + 7), sum("ResolutionWidth" + 8), sum("ResolutionWidth" + 9), sum("ResolutionWidth" + 10), sum("ResolutionWidth" + 11), sum("ResolutionWidth" + 12), sum("ResolutionWidth" + 13), sum("ResolutionWidth" + 14), sum("ResolutionWidth" + 15), sum("ResolutionWidth" + 16), sum("ResolutionWidth" + 17), sum("ResolutionWidth" + 18), sum("ResolutionWidth" + 19), sum("ResolutionWidth" + 20), sum("ResolutionWidth" + 21), sum("ResolutionWidth" + 22), sum("ResolutionWidth" + 23), sum("ResolutionWidth" + 24), sum("ResolutionWidth" + 25), sum("ResolutionWidth" + 26), sum("ResolutionWidth" + 27), sum("ResolutionWidth" + 28), sum("ResolutionWidth" + 29), sum("ResolutionWidth" + 30), sum("ResolutionWidth" + 31), sum("ResolutionWidth" + 32), sum("ResolutionWidth" + 33), sum("ResolutionWidth" + 34), sum("ResolutionWidth" + 35), sum("ResolutionWidth" + 36), sum("ResolutionWidth" + 37), sum("ResolutionWidth" + 38), sum("ResolutionWidth" + 39), sum("ResolutionWidth" + 40), sum("ResolutionWidth" + 41), sum("ResolutionWidth" + 42), sum("ResolutionWidth" + 43), sum("ResolutionWidth" + 44), sum("ResolutionWidth" + 45), sum("ResolutionWidth" + 46), sum("ResolutionWidth" + 47), sum("ResolutionWidth" + 48), sum("ResolutionWidth" + 49), sum("ResolutionWidth" + 50), sum("ResolutionWidth" + 51), sum("ResolutionWidth" + 52), sum("ResolutionWidth" + 53), sum("ResolutionWidth" + 54), sum("ResolutionWidth" + 55), sum("ResolutionWidth" + 56), sum("ResolutionWidth" + 57), sum("ResolutionWidth" + 58), sum("ResolutionWidth" + 59), sum("ResolutionWidth" + 60), sum("ResolutionWidth" + 61), sum("ResolutionWidth" + 62), sum("ResolutionWidth" + 63), sum("ResolutionWidth" + 64), sum("ResolutionWidth" + 65), sum("ResolutionWidth" + 66), sum("ResolutionWidth" + 67), sum("ResolutionWidth" + 68), sum("ResolutionWidth" + 69), sum("ResolutionWidth" + 70), sum("ResolutionWidth" + 71), sum("ResolutionWidth" + 72), sum("ResolutionWidth" + 73), sum("ResolutionWidth" + 74), sum("ResolutionWidth" + 75), sum("ResolutionWidth" + 76), sum("ResolutionWidth" + 77), sum("ResolutionWidth" + 78), sum("ResolutionWidth" + 79), sum("ResolutionWidth" + 80), sum("ResolutionWidth" + 81), sum("ResolutionWidth" + 82), sum("ResolutionWidth" + 83), sum("ResolutionWidth" + 84), sum("ResolutionWidth" + 85), sum("ResolutionWidth" + 86), sum("ResolutionWidth" + 87), sum("ResolutionWidth" + 88), sum("ResolutionWidth" + 89) FROM {table};
SELECT "SearchEngineID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
SELECT "WatchID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM {table} WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
SELECT "WatchID", "ClientIP", count(*) AS c, sum("Refresh"), avg("ResolutionWidth") FROM {table} GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
SELECT "URL", count(*) AS c FROM {table} GROUP BY "URL" ORDER BY c DESC LIMIT 10;
SELECT 1, "URL", count(*) AS c FROM {table} GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, count(*) AS c FROM {table} GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
SELECT "URL", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "Refresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10;
SELECT "Title", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "Refresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY "PageViews" DESC LIMIT 10;
SELECT "URL", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 1000;
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END, "URL" ORDER BY "PageViews" DESC LIMIT 1000;
SELECT "URLHash", "EventDate", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 686716256552154761 GROUP BY "URLHash", "EventDate" ORDER BY "PageViews" DESC LIMIT 100;
SELECT "WindowClientWidth", "WindowClientHeight", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "Refresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 686716256552154761 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY "PageViews" DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', "EventTime") AS "Minute", count(*) AS "PageViews" FROM {table} WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-02' AND "Refresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', "EventTime") ORDER BY DATE_TRUNC('minute', "EventTime");

View File

@ -1,19 +0,0 @@
#!/usr/bin/expect
# Set timeout
set timeout 600
# Get arguments
set query [lindex $argv 0]
spawn mclient -u monetdb -d test --timer=clock
expect "password:"
send "monetdb\r"
expect "sql>"
send "$query;\r"
expect "sql>"
send "\\q\r"
expect eof

File diff suppressed because it is too large Load Diff

View File

@ -1,17 +0,0 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
sudo systemctl restart omnisci_server
for i in {1..1000}; do
/opt/omnisci/bin/omnisql -t -p HyperInteractive <<< "SELECT 1;" 2>&1 | grep -q '1 rows returned' && break;
sleep 0.1;
done
sleep 10;
echo "$query";
for i in {1..3}; do
/opt/omnisci/bin/omnisql -t -p HyperInteractive <<< "$query" 2>&1 | grep -P 'Exception:|Execution time:';
done;
done;

View File

@ -1,332 +0,0 @@
# Instruction to run benchmark for OmniSci on web-analytics dataset
OmniSci (former name "MapD") is open-source (open-core) in-memory analytical DBMS with support for GPU processing.
It can run on CPU without GPU as well. It can show competitive performance on simple queries (like - simple aggregation on a single column).
# How to install
https://docs.omnisci.com/installation-and-configuration/installation/installing-on-ubuntu
# Caveats
- Dataset (at least needed columns) must fit in memory.
- It does not support data compression (only dictionary encoding for strings).
- First query execution is very slow because uncompressed data is read from disk.
- It does not support index for quick range queries.
- It does not support NOT NULL for data types.
- It does not support BLOB.
- No support for UNSIGNED data type (it's Ok according to SQL standard).
- Lack of string processing functions.
- Strings are limited to 32767 bytes.
- GROUP BY on text data type is supported only if it has dictionary encoding.
`Exception: Cannot group by string columns which are not dictionary encoded`
- Some aggregate functions are not supported for strings at all.
`Aggregate on TEXT is not supported yet.`
- Sometimes I hit a bug when query is run in infinite loop and does not finish (after retry it's finished successfully).
- One query executed in hours even with retries.
- Sorting is slow and disabled with default settings for large resultsets.
`Exception: Sorting the result would be too slow`
`Cast from dictionary-encoded string to none-encoded would be slow`
- There is approximate count distinct function but the precision is not documented.
To enable sorting of large resultsets, see:
https://stackoverflow.com/questions/62977734/omnissci-sorting-the-result-would-be-too-slow
The list of known issues is here:
https://github.com/omnisci/omniscidb/issues?q=is%3Aissue+author%3Aalexey-milovidov
# How to prepare data
Download the 100 million rows dataset from here and insert into ClickHouse:
https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/
Convert the CREATE TABLE query:
```
clickhouse-client --query "SHOW CREATE TABLE hits_100m" --format TSVRaw |
tr '`' '"' |
sed -r -e '
s/U?Int64/BIGINT/;
s/U?Int32/INTEGER/;
s/U?Int16/SMALLINT/;
s/U?Int8/TINYINT/;
s/DateTime/TIMESTAMP ENCODING FIXED(32)/;
s/ Date/ DATE ENCODING DAYS(16)/;
s/FixedString\(2\)/TEXT ENCODING DICT(16)/;
s/FixedString\(3\)/TEXT ENCODING DICT/;
s/FixedString\(\d+\)/TEXT ENCODING DICT/;
s/String/TEXT ENCODING DICT/;'
```
And cut `ENGINE` part.
The resulting CREATE TABLE query:
```
CREATE TABLE hits
(
"WatchID" BIGINT,
"JavaEnable" TINYINT,
"Title" TEXT ENCODING DICT,
"GoodEvent" SMALLINT,
"EventTime" TIMESTAMP ENCODING FIXED(32),
"EventDate" ENCODING DAYS(16) Date,
"CounterID" INTEGER,
"ClientIP" INTEGER,
"RegionID" INTEGER,
"UserID" BIGINT,
"CounterClass" TINYINT,
"OS" TINYINT,
"UserAgent" TINYINT,
"URL" TEXT ENCODING DICT,
"Referer" TEXT ENCODING DICT,
"Refresh" TINYINT,
"RefererCategoryID" SMALLINT,
"RefererRegionID" INTEGER,
"URLCategoryID" SMALLINT,
"URLRegionID" INTEGER,
"ResolutionWidth" SMALLINT,
"ResolutionHeight" SMALLINT,
"ResolutionDepth" TINYINT,
"FlashMajor" TINYINT,
"FlashMinor" TINYINT,
"FlashMinor2" TEXT ENCODING DICT,
"NetMajor" TINYINT,
"NetMinor" TINYINT,
"UserAgentMajor" SMALLINT,
"UserAgentMinor" TEXT ENCODING DICT(16),
"CookieEnable" TINYINT,
"JavascriptEnable" TINYINT,
"IsMobile" TINYINT,
"MobilePhone" TINYINT,
"MobilePhoneModel" TEXT ENCODING DICT,
"Params" TEXT ENCODING DICT,
"IPNetworkID" INTEGER,
"TraficSourceID" TINYINT,
"SearchEngineID" SMALLINT,
"SearchPhrase" TEXT ENCODING DICT,
"AdvEngineID" TINYINT,
"IsArtifical" TINYINT,
"WindowClientWidth" SMALLINT,
"WindowClientHeight" SMALLINT,
"ClientTimeZone" SMALLINT,
"ClientEventTime" TIMESTAMP ENCODING FIXED(32),
"SilverlightVersion1" TINYINT,
"SilverlightVersion2" TINYINT,
"SilverlightVersion3" INTEGER,
"SilverlightVersion4" SMALLINT,
"PageCharset" TEXT ENCODING DICT,
"CodeVersion" INTEGER,
"IsLink" TINYINT,
"IsDownload" TINYINT,
"IsNotBounce" TINYINT,
"FUniqID" BIGINT,
"OriginalURL" TEXT ENCODING DICT,
"HID" INTEGER,
"IsOldCounter" TINYINT,
"IsEvent" TINYINT,
"IsParameter" TINYINT,
"DontCountHits" TINYINT,
"WithHash" TINYINT,
"HitColor" TEXT ENCODING DICT(8),
"LocalEventTime" TIMESTAMP ENCODING FIXED(32),
"Age" TINYINT,
"Sex" TINYINT,
"Income" TINYINT,
"Interests" SMALLINT,
"Robotness" TINYINT,
"RemoteIP" INTEGER,
"WindowName" INTEGER,
"OpenerName" INTEGER,
"HistoryLength" SMALLINT,
"BrowserLanguage" TEXT ENCODING DICT(16),
"BrowserCountry" TEXT ENCODING DICT(16),
"SocialNetwork" TEXT ENCODING DICT,
"SocialAction" TEXT ENCODING DICT,
"HTTPError" SMALLINT,
"SendTiming" INTEGER,
"DNSTiming" INTEGER,
"ConnectTiming" INTEGER,
"ResponseStartTiming" INTEGER,
"ResponseEndTiming" INTEGER,
"FetchTiming" INTEGER,
"SocialSourceNetworkID" TINYINT,
"SocialSourcePage" TEXT ENCODING DICT,
"ParamPrice" BIGINT,
"ParamOrderID" TEXT ENCODING DICT,
"ParamCurrency" TEXT ENCODING DICT,
"ParamCurrencyID" SMALLINT,
"OpenstatServiceName" TEXT ENCODING DICT,
"OpenstatCampaignID" TEXT ENCODING DICT,
"OpenstatAdID" TEXT ENCODING DICT,
"OpenstatSourceID" TEXT ENCODING DICT,
"UTMSource" TEXT ENCODING DICT,
"UTMMedium" TEXT ENCODING DICT,
"UTMCampaign" TEXT ENCODING DICT,
"UTMContent" TEXT ENCODING DICT,
"UTMTerm" TEXT ENCODING DICT,
"FromTag" TEXT ENCODING DICT,
"HasGCLID" TINYINT,
"RefererHash" BIGINT,
"URLHash" BIGINT,
"CLID" INTEGER
);
```
Convert the dataset, prepare the list of fields for SELECT:
```
clickhouse-client --query "SHOW CREATE TABLE hits_100m" --format TSVRaw |
tr '`' '"' |
sed -r -e '
s/"(\w+)" U?Int([0-9]+)/toInt\2(\1)/;
s/"(\w+)" (Fixed)?String(\([0-9]+\))?/toValidUTF8(toString(\1))/;
s/"(\w+)" \w+/\1/'
```
The resulting SELECT query for data preparation:
```
SELECT
toInt64(WatchID),
toInt8(JavaEnable),
toValidUTF8(toString(Title)),
toInt16(GoodEvent),
EventTime,
EventDate,
toInt32(CounterID),
toInt32(ClientIP),
toInt32(RegionID),
toInt64(UserID),
toInt8(CounterClass),
toInt8(OS),
toInt8(UserAgent),
toValidUTF8(toString(URL)),
toValidUTF8(toString(Referer)),
toInt8(Refresh),
toInt16(RefererCategoryID),
toInt32(RefererRegionID),
toInt16(URLCategoryID),
toInt32(URLRegionID),
toInt16(ResolutionWidth),
toInt16(ResolutionHeight),
toInt8(ResolutionDepth),
toInt8(FlashMajor),
toInt8(FlashMinor),
toValidUTF8(toString(FlashMinor2)),
toInt8(NetMajor),
toInt8(NetMinor),
toInt16(UserAgentMajor),
toValidUTF8(toString(UserAgentMinor)),
toInt8(CookieEnable),
toInt8(JavascriptEnable),
toInt8(IsMobile),
toInt8(MobilePhone),
toValidUTF8(toString(MobilePhoneModel)),
toValidUTF8(toString(Params)),
toInt32(IPNetworkID),
toInt8(TraficSourceID),
toInt16(SearchEngineID),
toValidUTF8(toString(SearchPhrase)),
toInt8(AdvEngineID),
toInt8(IsArtifical),
toInt16(WindowClientWidth),
toInt16(WindowClientHeight),
toInt16(ClientTimeZone),
ClientEventTime,
toInt8(SilverlightVersion1),
toInt8(SilverlightVersion2),
toInt32(SilverlightVersion3),
toInt16(SilverlightVersion4),
toValidUTF8(toString(PageCharset)),
toInt32(CodeVersion),
toInt8(IsLink),
toInt8(IsDownload),
toInt8(IsNotBounce),
toInt64(FUniqID),
toValidUTF8(toString(OriginalURL)),
toInt32(HID),
toInt8(IsOldCounter),
toInt8(IsEvent),
toInt8(IsParameter),
toInt8(DontCountHits),
toInt8(WithHash),
toValidUTF8(toString(HitColor)),
LocalEventTime,
toInt8(Age),
toInt8(Sex),
toInt8(Income),
toInt16(Interests),
toInt8(Robotness),
toInt32(RemoteIP),
toInt32(WindowName),
toInt32(OpenerName),
toInt16(HistoryLength),
toValidUTF8(toString(BrowserLanguage)),
toValidUTF8(toString(BrowserCountry)),
toValidUTF8(toString(SocialNetwork)),
toValidUTF8(toString(SocialAction)),
toInt16(HTTPError),
toInt32(SendTiming),
toInt32(DNSTiming),
toInt32(ConnectTiming),
toInt32(ResponseStartTiming),
toInt32(ResponseEndTiming),
toInt32(FetchTiming),
toInt8(SocialSourceNetworkID),
toValidUTF8(toString(SocialSourcePage)),
toInt64(ParamPrice),
toValidUTF8(toString(ParamOrderID)),
toValidUTF8(toString(ParamCurrency)),
toInt16(ParamCurrencyID),
toValidUTF8(toString(OpenstatServiceName)),
toValidUTF8(toString(OpenstatCampaignID)),
toValidUTF8(toString(OpenstatAdID)),
toValidUTF8(toString(OpenstatSourceID)),
toValidUTF8(toString(UTMSource)),
toValidUTF8(toString(UTMMedium)),
toValidUTF8(toString(UTMCampaign)),
toValidUTF8(toString(UTMContent)),
toValidUTF8(toString(UTMTerm)),
toValidUTF8(toString(FromTag)),
toInt8(HasGCLID),
toInt64(RefererHash),
toInt64(URLHash),
toInt32(CLID)
FROM hits_100m_obfuscated
INTO OUTFILE '/home/milovidov/example_datasets/hits_100m_obfuscated.csv'
FORMAT CSV;
```
Upload data to OmniSci:
```
/opt/omnisci/bin/omnisql -t -p HyperInteractive
```
Run CREATE TABLE statement, then run:
```
COPY hits FROM '/home/milovidov/example_datasets/hits_100m_obfuscated.csv' WITH (HEADER = 'false');
```
Data loading took
```
336639 ms
```
on a server (Linux Ubuntu, Xeon E5-2560v2, 32 logical CPU, 128 GiB RAM, 8xHDD RAID-5, 40 TB).
Run benchmark:
```
./benchmark.sh
```
Prepare the result to paste into JSON:
```
grep -oP 'Total time: \d+' log.txt |
grep -oP '\d+' |
awk '{
if (i % 3 == 0) { a = $1 }
else if (i % 3 == 1) { b = $1 }
else if (i % 3 == 2) { c = $1; print "[" a / 1000 ", " b / 1000 ", " c / 1000 "]," };
++i; }'
```
And fill out `[null, null, null]` for missing runs.

View File

@ -1,210 +0,0 @@
3
SELECT count(*) FROM hits;
Execution time: 23471 ms, Total time: 23471 ms
Execution time: 42 ms, Total time: 43 ms
Execution time: 35 ms, Total time: 35 ms
3
SELECT count(*) FROM hits WHERE AdvEngineID != 0;
Execution time: 17328 ms, Total time: 17329 ms
Execution time: 58 ms, Total time: 59 ms
Execution time: 57 ms, Total time: 59 ms
3
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
Execution time: 17309 ms, Total time: 17310 ms
Execution time: 115 ms, Total time: 115 ms
Execution time: 129 ms, Total time: 130 ms
3
SELECT sum(UserID) FROM hits;
Execution time: 26091 ms, Total time: 26091 ms
Execution time: 88 ms, Total time: 89 ms
Execution time: 71 ms, Total time: 72 ms
3
SELECT APPROX_COUNT_DISTINCT(UserID) FROM hits;
Execution time: 21720 ms, Total time: 21720 ms
Execution time: 364 ms, Total time: 364 ms
Execution time: 344 ms, Total time: 345 ms
3
SELECT APPROX_COUNT_DISTINCT(SearchPhrase) FROM hits;
Execution time: 19314 ms, Total time: 19315 ms
Execution time: 385 ms, Total time: 386 ms
Execution time: 382 ms, Total time: 382 ms
3
SELECT min(EventDate), max(EventDate) FROM hits;
Execution time: 19431 ms, Total time: 19432 ms
Execution time: 130 ms, Total time: 131 ms
Execution time: 147 ms, Total time: 148 ms
3
SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
Execution time: 20660 ms, Total time: 20661 ms
Execution time: 63 ms, Total time: 64 ms
Execution time: 88 ms, Total time: 89 ms
3
SELECT RegionID, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
Execution time: 21364 ms, Total time: 21472 ms
Execution time: 1387 ms, Total time: 1504 ms
Execution time: 1443 ms, Total time: 1505 ms
3
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), APPROX_COUNT_DISTINCT(UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
Execution time: 22205 ms, Total time: 22285 ms
Execution time: 1590 ms, Total time: 1655 ms
Execution time: 1591 ms, Total time: 1658 ms
3
SELECT MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
Execution time: 22343 ms, Total time: 22344 ms
Execution time: 122 ms, Total time: 123 ms
Execution time: 117 ms, Total time: 118 ms
3
SELECT MobilePhone, MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
Execution time: 21681 ms, Total time: 21695 ms
Execution time: 299 ms, Total time: 310 ms
Execution time: 275 ms, Total time: 292 ms
3
SELECT SearchPhrase, count(*) AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Execution time: 23346 ms, Total time: 23360 ms
Execution time: 613 ms, Total time: 631 ms
Execution time: 606 ms, Total time: 624 ms
3
SELECT SearchPhrase, APPROX_COUNT_DISTINCT(UserID) AS u FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
Execution time: 66014 ms, Total time: 68618 ms
Execution time: 44309 ms, Total time: 47296 ms
Execution time: 44019 ms, Total time: 46866 ms
3
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
Execution time: 25853 ms, Total time: 25984 ms
Execution time: 2590 ms, Total time: 2728 ms
Execution time: 2652 ms, Total time: 2789 ms
3
SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
Execution time: 26581 ms, Total time: 26953 ms
Execution time: 5843 ms, Total time: 6158 ms
Execution time: 5970 ms, Total time: 6286 ms
3
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Execution time: 33007 ms, Total time: 33581 ms
Execution time: 9943 ms, Total time: 10509 ms
Execution time: 9470 ms, Total time: 10047 ms
3
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
Execution time: 39009 ms, Total time: 39575 ms
Execution time: 8151 ms, Total time: 8785 ms
Execution time: 8037 ms, Total time: 8665 ms
3
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Execution time: 56207 ms, Total time: 57764 ms
Execution time: 26653 ms, Total time: 28199 ms
Execution time: 25614 ms, Total time: 27336 ms
3
SELECT UserID FROM hits WHERE UserID = -6101065172474983726;
Execution time: 18975 ms, Total time: 18976 ms
Execution time: 136 ms, Total time: 136 ms
Execution time: 136 ms, Total time: 136 ms
3
SELECT count(*) FROM hits WHERE URL LIKE '%metrika%';
Execution time: 32444 ms, Total time: 32445 ms
Execution time: 125 ms, Total time: 126 ms
Execution time: 134 ms, Total time: 136 ms
3
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Exception: Aggregate on TEXT is not supported yet.
Exception: Aggregate on TEXT is not supported yet.
Exception: Aggregate on TEXT is not supported yet.
3
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, APPROX_COUNT_DISTINCT(UserID) FROM hits WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Exception: Aggregate on TEXT is not supported yet.
Exception: Aggregate on TEXT is not supported yet.
Exception: Aggregate on TEXT is not supported yet.
3
SELECT * FROM hits WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
Execution time: 96163 ms, Total time: 96166 ms
Execution time: 312 ms, Total time: 314 ms
Execution time: 303 ms, Total time: 305 ms
3
SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
Execution time: 27493 ms, Total time: 27494 ms
Execution time: 216 ms, Total time: 216 ms
Execution time: 221 ms, Total time: 222 ms
3
SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
Execution time: 38230 ms, Total time: 38308 ms
Execution time: 17175 ms, Total time: 17256 ms
Execution time: 17225 ms, Total time: 17310 ms
3
SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
Execution time: 115614 ms, Total time: 115714 ms
Execution time: 95944 ms, Total time: 96041 ms
Execution time: 94274 ms, Total time: 94383 ms
3
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
Execution time: 31775 ms, Total time: 31779 ms
Execution time: 2643 ms, Total time: 2647 ms
Execution time: 2933 ms, Total time: 2937 ms
3
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
Exception: Exception occurred: org.apache.calcite.runtime.CalciteContextException: From line 1, column 8 to line 1, column 36: No match found for function signature domainWithoutWWW(<CHARACTER>)
3
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
Execution time: 28853 ms, Total time: 28854 ms
Execution time: 5654 ms, Total time: 5655 ms
Execution time: 5579 ms, Total time: 5581 ms
3
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
Execution time: 31694 ms, Total time: 31925 ms
Execution time: 3872 ms, Total time: 4142 ms
Execution time: 3928 ms, Total time: 4162 ms
3
SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Execution time: 43690 ms, Total time: 44297 ms
Execution time: 8221 ms, Total time: 8825 ms
Execution time: 8115 ms, Total time: 8711 ms
3
SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
Execution time: 29669 ms, Total time: 29715 ms
Execution time: 1623 ms, Total time: 1669 ms
Execution time: 1534 ms, Total time: 1586 ms
3
SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
Execution time: 34860 ms, Total time: 35201 ms
Execution time: 7075 ms, Total time: 7414 ms
Execution time: 7164 ms, Total time: 7567 ms
3
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
Execution time: 26467 ms, Total time: 26724 ms
Execution time: 5740 ms, Total time: 6026 ms
Execution time: 5667 ms, Total time: 5920 ms
3
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
Execution time: 31899 ms, Total time: 31908 ms
Execution time: 1141 ms, Total time: 1154 ms
Execution time: 1155 ms, Total time: 1168 ms
3
SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
Execution time: 27991 ms, Total time: 27997 ms
Execution time: 719 ms, Total time: 724 ms
Execution time: 737 ms, Total time: 744 ms
3
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
Execution time: 34651 ms, Total time: 34661 ms
Execution time: 1182 ms, Total time: 1200 ms
Execution time: 1142 ms, Total time: 1159 ms
3
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
Execution time: 30130 ms, Total time: 30136 ms
Execution time: 461 ms, Total time: 467 ms
Execution time: 445 ms, Total time: 451 ms
3
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
Execution time: 19989 ms, Total time: 19991 ms
Execution time: 326 ms, Total time: 327 ms
Execution time: 325 ms, Total time: 326 ms
3
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
Execution time: 18658 ms, Total time: 18660 ms
Execution time: 265 ms, Total time: 266 ms
Execution time: 254 ms, Total time: 255 ms
3
SELECT DATE_TRUNC(minute, EventTime) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "Refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC(minute, EventTime) ORDER BY DATE_TRUNC(minute, EventTime);
Execution time: 25225 ms, Total time: 25227 ms
Execution time: 210 ms, Total time: 212 ms
Execution time: 199 ms, Total time: 200 ms

View File

@ -1,43 +0,0 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT APPROX_COUNT_DISTINCT(UserID) FROM {table};
SELECT APPROX_COUNT_DISTINCT(SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), APPROX_COUNT_DISTINCT(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, APPROX_COUNT_DISTINCT(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, APPROX_COUNT_DISTINCT(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
#SELECT WatchID, ClientIP, count(*) AS c, sum("Refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "Refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "Refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC(minute, EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "Refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC(minute, EventTime) ORDER BY DATE_TRUNC(minute, EventTime);

View File

@ -1,12 +0,0 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_pg/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
# For some reason JIT does not work on my machine
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

View File

@ -1,142 +0,0 @@
Create a table in PostgreSQL:
```
CREATE TABLE hits_100m_pg
(
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title TEXT NOT NULL,
GoodEvent SMALLINT NOT NULL,
EventTime TIMESTAMP NOT NULL,
EventDate Date NOT NULL,
CounterID INTEGER NOT NULL,
ClientIP INTEGER NOT NULL,
RegionID INTEGER NOT NULL,
UserID BIGINT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL TEXT NOT NULL,
Referer TEXT NOT NULL,
Refresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INTEGER NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INTEGER NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 TEXT NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor CHAR(2) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel TEXT NOT NULL,
Params TEXT NOT NULL,
IPNetworkID INTEGER NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase TEXT NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime TIMESTAMP NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INTEGER NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset TEXT NOT NULL,
CodeVersion INTEGER NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL TEXT NOT NULL,
HID INTEGER NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime TIMESTAMP NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INTEGER NOT NULL,
WindowName INTEGER NOT NULL,
OpenerName INTEGER NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage TEXT NOT NULL,
BrowserCountry TEXT NOT NULL,
SocialNetwork TEXT NOT NULL,
SocialAction TEXT NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INTEGER NOT NULL,
DNSTiming INTEGER NOT NULL,
ConnectTiming INTEGER NOT NULL,
ResponseStartTiming INTEGER NOT NULL,
ResponseEndTiming INTEGER NOT NULL,
FetchTiming INTEGER NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage TEXT NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID TEXT NOT NULL,
ParamCurrency TEXT NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName TEXT NOT NULL,
OpenstatCampaignID TEXT NOT NULL,
OpenstatAdID TEXT NOT NULL,
OpenstatSourceID TEXT NOT NULL,
UTMSource TEXT NOT NULL,
UTMMedium TEXT NOT NULL,
UTMCampaign TEXT NOT NULL,
UTMContent TEXT NOT NULL,
UTMTerm TEXT NOT NULL,
FromTag TEXT NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INTEGER NOT NULL
);
```
Create a dump from ClickHouse:
```
SELECT WatchID::Int64, JavaEnable, replaceAll(replaceAll(replaceAll(toValidUTF8(Title), '\0', ''), '"', ''), '\\', ''), GoodEvent, EventTime, EventDate, CounterID::Int32, ClientIP::Int32, RegionID::Int32,
UserID::Int64, CounterClass, OS, UserAgent, replaceAll(replaceAll(replaceAll(toValidUTF8(URL), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Referer), '\0', ''), '"', ''), '\\', ''), Refresh, RefererCategoryID::Int16, RefererRegionID::Int32,
URLCategoryID::Int16, URLRegionID::Int32, ResolutionWidth::Int16, ResolutionHeight::Int16, ResolutionDepth, FlashMajor, FlashMinor,
FlashMinor2, NetMajor, NetMinor, UserAgentMajor::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(UserAgentMinor::String), '\0', ''), '"', ''), '\\', ''), CookieEnable, JavascriptEnable, IsMobile, MobilePhone,
replaceAll(replaceAll(replaceAll(toValidUTF8(MobilePhoneModel), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Params), '\0', ''), '"', ''), '\\', ''), IPNetworkID::Int32, TraficSourceID, SearchEngineID::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(SearchPhrase), '\0', ''), '"', ''), '\\', ''),
AdvEngineID, IsArtifical, WindowClientWidth::Int16, WindowClientHeight::Int16, ClientTimeZone, ClientEventTime,
SilverlightVersion1, SilverlightVersion2, SilverlightVersion3::Int32, SilverlightVersion4::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(PageCharset), '\0', ''), '"', ''), '\\', ''),
CodeVersion::Int32, IsLink, IsDownload, IsNotBounce, FUniqID::Int64, replaceAll(replaceAll(replaceAll(toValidUTF8(OriginalURL), '\0', ''), '"', ''), '\\', ''), HID::Int32, IsOldCounter, IsEvent,
IsParameter, DontCountHits, WithHash, replaceAll(replaceAll(replaceAll(toValidUTF8(HitColor::String), '\0', ''), '"', ''), '\\', ''), LocalEventTime, Age, Sex, Income, Interests::Int16, Robotness, RemoteIP::Int32,
WindowName, OpenerName, HistoryLength, replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserLanguage::String), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserCountry::String), '\0', ''), '"', ''), '\\', ''),
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialNetwork), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(SocialAction), '\0', ''), '"', ''), '\\', ''),
HTTPError, least(SendTiming, 30000), least(DNSTiming, 30000), least(ConnectTiming, 30000), least(ResponseStartTiming, 30000),
least(ResponseEndTiming, 30000), least(FetchTiming, 30000), SocialSourceNetworkID,
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialSourcePage), '\0', ''), '"', ''), '\\', ''), ParamPrice, replaceAll(replaceAll(replaceAll(toValidUTF8(ParamOrderID), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(ParamCurrency::String), '\0', ''), '"', ''), '\\', ''),
ParamCurrencyID::Int16, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID,
UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash::Int64, URLHash::Int64, CLID::Int32
FROM hits_100m_obfuscated
INTO OUTFILE 'dump.tsv'
FORMAT TSV
```
Insert data into PostgreSQL:
```
\copy hits_100m_pg FROM 'dump.tsv';
```

View File

@ -1,129 +0,0 @@
Time: 122020.258 ms (02:02.020)
Time: 5060.281 ms (00:05.060)
Time: 5052.692 ms (00:05.053)
Time: 129594.172 ms (02:09.594)
Time: 8079.623 ms (00:08.080)
Time: 7866.964 ms (00:07.867)
Time: 129584.717 ms (02:09.585)
Time: 8276.161 ms (00:08.276)
Time: 8153.295 ms (00:08.153)
Time: 123707.890 ms (02:03.708)
Time: 6835.297 ms (00:06.835)
Time: 6607.039 ms (00:06.607)
Time: 166640.676 ms (02:46.641)
Time: 75401.239 ms (01:15.401)
Time: 73526.027 ms (01:13.526)
Time: 272715.750 ms (04:32.716)
Time: 182721.613 ms (03:02.722)
Time: 182880.525 ms (03:02.881)
Time: 127108.191 ms (02:07.108)
Time: 6542.913 ms (00:06.543)
Time: 6339.887 ms (00:06.340)
Time: 127339.314 ms (02:07.339)
Time: 8376.381 ms (00:08.376)
Time: 7831.872 ms (00:07.832)
Time: 179176.439 ms (02:59.176)
Time: 58559.297 ms (00:58.559)
Time: 58139.265 ms (00:58.139)
Time: 182019.101 ms (03:02.019)
Time: 58435.027 ms (00:58.435)
Time: 58130.994 ms (00:58.131)
Time: 132449.502 ms (02:12.450)
Time: 11203.104 ms (00:11.203)
Time: 11048.435 ms (00:11.048)
Time: 128445.641 ms (02:08.446)
Time: 11602.145 ms (00:11.602)
Time: 11418.356 ms (00:11.418)
Time: 162831.387 ms (02:42.831)
Time: 41510.710 ms (00:41.511)
Time: 41682.899 ms (00:41.683)
Time: 171898.965 ms (02:51.899)
Time: 47379.274 ms (00:47.379)
Time: 47429.908 ms (00:47.430)
Time: 161607.811 ms (02:41.608)
Time: 41674.409 ms (00:41.674)
Time: 40854.340 ms (00:40.854)
Time: 175247.929 ms (02:55.248)
Time: 46721.776 ms (00:46.722)
Time: 46507.631 ms (00:46.508)
Time: 335961.271 ms (05:35.961)
Time: 248535.866 ms (04:08.536)
Time: 247383.678 ms (04:07.384)
Time: 132852.983 ms (02:12.853)
Time: 14939.304 ms (00:14.939)
Time: 14607.525 ms (00:14.608)
Time: 243461.844 ms (04:03.462)
Time: 157307.904 ms (02:37.308)
Time: 155093.101 ms (02:35.093)
Time: 122090.761 ms (02:02.091)
Time: 6411.266 ms (00:06.411)
Time: 6308.178 ms (00:06.308)
Time: 126584.819 ms (02:06.585)
Time: 8836.471 ms (00:08.836)
Time: 8532.176 ms (00:08.532)
Time: 125225.097 ms (02:05.225)
Time: 10236.910 ms (00:10.237)
Time: 9849.757 ms (00:09.850)
Time: 139140.064 ms (02:19.140)
Time: 21797.859 ms (00:21.798)
Time: 21559.214 ms (00:21.559)
Time: 124757.485 ms (02:04.757)
Time: 8728.403 ms (00:08.728)
Time: 8714.130 ms (00:08.714)
Time: 120687.258 ms (02:00.687)
Time: 8366.245 ms (00:08.366)
Time: 8146.856 ms (00:08.147)
Time: 122327.148 ms (02:02.327)
Time: 8698.359 ms (00:08.698)
Time: 8480.807 ms (00:08.481)
Time: 123958.614 ms (02:03.959)
Time: 8595.931 ms (00:08.596)
Time: 8241.773 ms (00:08.242)
Time: 128982.905 ms (02:08.983)
Time: 11252.783 ms (00:11.253)
Time: 10957.931 ms (00:10.958)
Time: 208455.385 ms (03:28.455)
Time: 102530.897 ms (01:42.531)
Time: 102049.298 ms (01:42.049)
Time: 131268.420 ms (02:11.268)
Time: 21094.466 ms (00:21.094)
Time: 20934.610 ms (00:20.935)
Time: 164084.134 ms (02:44.084)
Time: 77418.547 ms (01:17.419)
Time: 75422.290 ms (01:15.422)
Time: 174800.022 ms (02:54.800)
Time: 87859.594 ms (01:27.860)
Time: 85733.954 ms (01:25.734)
Time: 419357.463 ms (06:59.357)
Time: 339047.269 ms (05:39.047)
Time: 334808.230 ms (05:34.808)
Time: 475011.901 ms (07:55.012)
Time: 344406.246 ms (05:44.406)
Time: 347197.731 ms (05:47.198)
Time: 464657.732 ms (07:44.658)
Time: 332084.079 ms (05:32.084)
Time: 330921.322 ms (05:30.921)
Time: 152490.615 ms (02:32.491)
Time: 30954.343 ms (00:30.954)
Time: 31379.062 ms (00:31.379)
Time: 128539.127 ms (02:08.539)
Time: 12802.672 ms (00:12.803)
Time: 12494.088 ms (00:12.494)
Time: 125850.120 ms (02:05.850)
Time: 10318.773 ms (00:10.319)
Time: 9953.030 ms (00:09.953)
Time: 126602.092 ms (02:06.602)
Time: 8935.571 ms (00:08.936)
Time: 8711.184 ms (00:08.711)
Time: 133222.456 ms (02:13.222)
Time: 11848.869 ms (00:11.849)
Time: 11752.640 ms (00:11.753)
Time: 126950.067 ms (02:06.950)
Time: 11260.892 ms (00:11.261)
Time: 10943.649 ms (00:10.944)
Time: 128451.171 ms (02:08.451)
Time: 10984.980 ms (00:10.985)
Time: 10770.609 ms (00:10.771)
Time: 124621.000 ms (02:04.621)
Time: 8885.466 ms (00:08.885)
Time: 8857.296 ms (00:08.857)

View File

@ -1,43 +0,0 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

View File

@ -1,11 +0,0 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_obfuscated/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

View File

@ -1,215 +0,0 @@
3
SELECT count(*) FROM hits_100m_obfuscated;
Time: 3259.733 ms (00:03.260)
Time: 3135.484 ms (00:03.135)
Time: 3135.579 ms (00:03.136)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0;
Time: 146854.557 ms (02:26.855)
Time: 6921.736 ms (00:06.922)
Time: 6619.892 ms (00:06.620)
3
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_100m_obfuscated;
Time: 146568.297 ms (02:26.568)
Time: 7481.610 ms (00:07.482)
Time: 7258.209 ms (00:07.258)
3
SELECT sum(UserID) FROM hits_100m_obfuscated;
Time: 146864.106 ms (02:26.864)
Time: 5690.024 ms (00:05.690)
Time: 5381.820 ms (00:05.382)
3
SELECT COUNT(DISTINCT UserID) FROM hits_100m_obfuscated;
Time: 227507.331 ms (03:47.507)
Time: 69165.471 ms (01:09.165)
Time: 72216.950 ms (01:12.217)
3
SELECT COUNT(DISTINCT SearchPhrase) FROM hits_100m_obfuscated;
Time: 323644.397 ms (05:23.644)
Time: 177578.740 ms (02:57.579)
Time: 175055.738 ms (02:55.056)
3
SELECT min(EventDate), max(EventDate) FROM hits_100m_obfuscated;
Time: 146147.843 ms (02:26.148)
Time: 5735.128 ms (00:05.735)
Time: 5428.638 ms (00:05.429)
3
SELECT AdvEngineID, count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
Time: 148658.450 ms (02:28.658)
Time: 7014.882 ms (00:07.015)
Time: 6599.736 ms (00:06.600)
3
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY u DESC LIMIT 10;
Time: 202423.122 ms (03:22.423)
Time: 54439.047 ms (00:54.439)
Time: 54800.354 ms (00:54.800)
3
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY c DESC LIMIT 10;
Time: 201152.491 ms (03:21.152)
Time: 55875.854 ms (00:55.876)
Time: 55200.330 ms (00:55.200)
3
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 146042.603 ms (02:26.043)
Time: 9931.633 ms (00:09.932)
Time: 10037.032 ms (00:10.037)
3
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 150811.952 ms (02:30.812)
Time: 10320.230 ms (00:10.320)
Time: 9993.232 ms (00:09.993)
3
SELECT SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 173071.218 ms (02:53.071)
Time: 34314.835 ms (00:34.315)
Time: 34420.919 ms (00:34.421)
3
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
Time: 172874.155 ms (02:52.874)
Time: 43704.494 ms (00:43.704)
Time: 43918.380 ms (00:43.918)
3
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 178484.822 ms (02:58.485)
Time: 36850.436 ms (00:36.850)
Time: 35789.029 ms (00:35.789)
3
SELECT UserID, count(*) FROM hits_100m_obfuscated GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
Time: 169720.759 ms (02:49.721)
Time: 24125.730 ms (00:24.126)
Time: 23782.745 ms (00:23.783)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 182335.631 ms (03:02.336)
Time: 37324.563 ms (00:37.325)
Time: 37124.250 ms (00:37.124)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase LIMIT 10;
Time: 163799.714 ms (02:43.800)
Time: 18514.031 ms (00:18.514)
Time: 18968.524 ms (00:18.969)
3
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 294799.480 ms (04:54.799)
Time: 149592.992 ms (02:29.593)
Time: 149466.291 ms (02:29.466)
3
SELECT UserID FROM hits_100m_obfuscated WHERE UserID = -6101065172474983726;
Time: 140797.496 ms (02:20.797)
Time: 5312.321 ms (00:05.312)
Time: 5020.502 ms (00:05.021)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%';
Time: 143092.287 ms (02:23.092)
Time: 7893.874 ms (00:07.894)
Time: 7661.326 ms (00:07.661)
3
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 143682.424 ms (02:23.682)
Time: 9249.962 ms (00:09.250)
Time: 9073.876 ms (00:09.074)
3
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits_100m_obfuscated WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 150965.884 ms (02:30.966)
Time: 20350.812 ms (00:20.351)
Time: 20074.939 ms (00:20.075)
3
SELECT * FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
Time: 4674.669 ms (00:04.675)
Time: 4532.389 ms (00:04.532)
Time: 4555.457 ms (00:04.555)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
Time: 5.177 ms
Time: 5.031 ms
Time: 4.419 ms
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
Time: 141152.210 ms (02:21.152)
Time: 7492.968 ms (00:07.493)
Time: 7300.428 ms (00:07.300)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
Time: 30.736 ms
Time: 5.018 ms
Time: 5.132 ms
3
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_100m_obfuscated WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 144034.016 ms (02:24.034)
Time: 10701.672 ms (00:10.702)
Time: 10348.565 ms (00:10.349)
3
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www.)?([^/]+)/.*$', '1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits_100m_obfuscated WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 191575.080 ms (03:11.575)
Time: 97836.706 ms (01:37.837)
Time: 97673.219 ms (01:37.673)
3
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_100m_obfuscated;
Time: 143652.317 ms (02:23.652)
Time: 22185.656 ms (00:22.186)
Time: 21887.411 ms (00:21.887)
3
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 153481.944 ms (02:33.482)
Time: 17748.628 ms (00:17.749)
Time: 17551.116 ms (00:17.551)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 167448.684 ms (02:47.449)
Time: 25902.961 ms (00:25.903)
Time: 25592.018 ms (00:25.592)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 299183.443 ms (04:59.183)
Time: 145349.772 ms (02:25.350)
Time: 143214.688 ms (02:23.215)
3
SELECT URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY URL ORDER BY c DESC LIMIT 10;
Time: 389851.369 ms (06:29.851)
Time: 228158.639 ms (03:48.159)
Time: 231811.118 ms (03:51.811)
3
SELECT 1, URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
Time: 407458.343 ms (06:47.458)
Time: 230125.530 ms (03:50.126)
Time: 230764.511 ms (03:50.765)
3
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_100m_obfuscated GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
Time: 174098.556 ms (02:54.099)
Time: 23503.975 ms (00:23.504)
Time: 24322.856 ms (00:24.323)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
Time: 145906.025 ms (02:25.906)
Time: 10824.695 ms (00:10.825)
Time: 10484.885 ms (00:10.485)
3
SELECT Title, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
Time: 144063.711 ms (02:24.064)
Time: 8947.980 ms (00:08.948)
Time: 8608.434 ms (00:08.608)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
Time: 141883.596 ms (02:21.884)
Time: 7977.257 ms (00:07.977)
Time: 7673.547 ms (00:07.674)
3
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
Time: 147100.084 ms (02:27.100)
Time: 9527.812 ms (00:09.528)
Time: 9457.663 ms (00:09.458)
3
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
Time: 144585.669 ms (02:24.586)
Time: 10815.223 ms (00:10.815)
Time: 10594.707 ms (00:10.595)
3
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
Time: 145738.341 ms (02:25.738)
Time: 10592.979 ms (00:10.593)
Time: 10181.477 ms (00:10.181)
3
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);
Time: 145023.796 ms (02:25.024)
Time: 8035.337 ms (00:08.035)
Time: 7865.698 ms (00:07.866)

View File

@ -1,129 +0,0 @@
Time: 1784.299 ms (00:01.784)
Time: 1223.461 ms (00:01.223)
Time: 1200.665 ms (00:01.201)
Time: 22730.141 ms (00:22.730)
Time: 1379.227 ms (00:01.379)
Time: 1361.595 ms (00:01.362)
Time: 29888.235 ms (00:29.888)
Time: 3160.611 ms (00:03.161)
Time: 3207.363 ms (00:03.207)
Time: 53922.569 ms (00:53.923)
Time: 2301.456 ms (00:02.301)
Time: 2277.009 ms (00:02.277)
Time: 45363.999 ms (00:45.364)
Time: 43765.848 ms (00:43.766)
Time: 44066.621 ms (00:44.067)
Time: 172945.633 ms (02:52.946)
Time: 136944.098 ms (02:16.944)
Time: 138268.413 ms (02:18.268)
Time: 16764.579 ms (00:16.765)
Time: 2579.907 ms (00:02.580)
Time: 2590.390 ms (00:02.590)
Time: 1498.034 ms (00:01.498)
Time: 1434.534 ms (00:01.435)
Time: 1448.123 ms (00:01.448)
Time: 113533.016 ms (01:53.533)
Time: 78465.335 ms (01:18.465)
Time: 80778.839 ms (01:20.779)
Time: 90456.388 ms (01:30.456)
Time: 87050.166 ms (01:27.050)
Time: 88426.851 ms (01:28.427)
Time: 45021.632 ms (00:45.022)
Time: 12486.342 ms (00:12.486)
Time: 12222.489 ms (00:12.222)
Time: 44246.843 ms (00:44.247)
Time: 15606.856 ms (00:15.607)
Time: 15251.554 ms (00:15.252)
Time: 29654.719 ms (00:29.655)
Time: 29441.858 ms (00:29.442)
Time: 29608.141 ms (00:29.608)
Time: 103547.383 ms (01:43.547)
Time: 104733.648 ms (01:44.734)
Time: 105779.016 ms (01:45.779)
Time: 29695.834 ms (00:29.696)
Time: 15395.447 ms (00:15.395)
Time: 15819.650 ms (00:15.820)
Time: 27841.552 ms (00:27.842)
Time: 29521.849 ms (00:29.522)
Time: 27508.521 ms (00:27.509)
Time: 56665.709 ms (00:56.666)
Time: 56459.321 ms (00:56.459)
Time: 56407.620 ms (00:56.408)
Time: 27488.888 ms (00:27.489)
Time: 25557.427 ms (00:25.557)
Time: 25634.140 ms (00:25.634)
Time: 97376.463 ms (01:37.376)
Time: 96047.902 ms (01:36.048)
Time: 99918.341 ms (01:39.918)
Time: 6294.887 ms (00:06.295)
Time: 6407.262 ms (00:06.407)
Time: 6376.369 ms (00:06.376)
Time: 40787.808 ms (00:40.788)
Time: 11206.256 ms (00:11.206)
Time: 11219.871 ms (00:11.220)
Time: 12420.227 ms (00:12.420)
Time: 12548.301 ms (00:12.548)
Time: 12468.458 ms (00:12.468)
Time: 57679.878 ms (00:57.680)
Time: 35466.123 ms (00:35.466)
Time: 35562.064 ms (00:35.562)
Time: 13551.276 ms (00:13.551)
Time: 13417.313 ms (00:13.417)
Time: 13645.287 ms (00:13.645)
Time: 150.297 ms
Time: 55.995 ms
Time: 55.796 ms
Time: 3059.796 ms (00:03.060)
Time: 3038.246 ms (00:03.038)
Time: 3041.210 ms (00:03.041)
Time: 4461.720 ms (00:04.462)
Time: 4446.691 ms (00:04.447)
Time: 4424.526 ms (00:04.425)
Time: 29275.463 ms (00:29.275)
Time: 17558.747 ms (00:17.559)
Time: 17438.621 ms (00:17.439)
Time: 203316.184 ms (03:23.316)
Time: 190037.946 ms (03:10.038)
Time: 189276.624 ms (03:09.277)
Time: 36921.542 ms (00:36.922)
Time: 36963.771 ms (00:36.964)
Time: 36660.406 ms (00:36.660)
Time: 38307.345 ms (00:38.307)
Time: 17597.355 ms (00:17.597)
Time: 17324.776 ms (00:17.325)
Time: 39857.567 ms (00:39.858)
Time: 26776.411 ms (00:26.776)
Time: 26592.819 ms (00:26.593)
Time: 162782.290 ms (02:42.782)
Time: 160722.582 ms (02:40.723)
Time: 162487.263 ms (02:42.487)
Time: 261494.290 ms (04:21.494)
Time: 263594.014 ms (04:23.594)
Time: 260436.201 ms (04:20.436)
Time: 265758.455 ms (04:25.758)
Time: 270087.523 ms (04:30.088)
Time: 266617.218 ms (04:26.617)
Time: 30677.159 ms (00:30.677)
Time: 28933.542 ms (00:28.934)
Time: 29815.271 ms (00:29.815)
Time: 19754.932 ms (00:19.755)
Time: 16851.157 ms (00:16.851)
Time: 16703.289 ms (00:16.703)
Time: 10379.500 ms (00:10.379)
Time: 10267.336 ms (00:10.267)
Time: 10287.944 ms (00:10.288)
Time: 17320.582 ms (00:17.321)
Time: 9786.410 ms (00:09.786)
Time: 9760.578 ms (00:09.761)
Time: 33487.352 ms (00:33.487)
Time: 26056.528 ms (00:26.057)
Time: 25958.258 ms (00:25.958)
Time: 28020.227 ms (00:28.020)
Time: 5609.725 ms (00:05.610)
Time: 5538.744 ms (00:05.539)
Time: 15119.473 ms (00:15.119)
Time: 5057.455 ms (00:05.057)
Time: 5063.154 ms (00:05.063)
Time: 3627.703 ms (00:03.628)
Time: 3645.232 ms (00:03.645)
Time: 3546.855 ms (00:03.547)

View File

@ -1,43 +0,0 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

File diff suppressed because it is too large Load Diff

View File

@ -1,40 +0,0 @@
Quick installation instructions
-------------------------------
Register on my.vertica.com
https://my.vertica.com/download-community-edition/
Download HP Vertica 7.1.1 Analytic Database Server, Debian or Ubuntu 14.04 version.
sudo apt-get install sysstat pstack mcelog
sudo dpkg -i vertica_7.1.1-0_amd64.deb
sudo sh -c "echo 'export TZ=Europe/Moscow' >> /home/dbadmin/.bash_profile"
# Don't specify localhost due to poor support of IPv6.
sudo /opt/vertica/sbin/install_vertica --hosts=127.0.0.1 --failure-threshold=NONE
sudo mkdir /opt/vertica-data/
sudo chown dbadmin /opt/vertica-data/
sudo su dbadmin
/opt/vertica/bin/adminTools
configuration menu
create database
name: default
empty password
both directories: /opt/vertica-data/
main menu
exit
How to prepare data
-------------------
Prepare dumps with script create_dump.sh for tables hits_10m, hits_100m, hits_1000m. It takes about 5 hours (1m41.882s, 25m11.103s, 276m36.388s).
Start vsql command line client.
/opt/vertica/bin/vsql -U dbadmin
Create tables with queries from hits_define_schema.sql.
Time to insert data:
hits_10m: 91 sec.
hits_100m: 774 sec.
hits_1000m: 13769 sec.
You need to validate number of rows with SELECT count(*).

View File

@ -1,24 +0,0 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$((echo '\timing'; echo "$query") |
/opt/vertica/bin/vsql -U dbadmin |
grep -oP 'All rows formatted: [^ ]+ ms' |
ssed -R -e 's/^All rows formatted: ([\d,]+) ms$/\1/' |
tr ',' '.')
[[ "$?" == "0" ]] && echo -n "$(perl -e "print ${RES} / 1000")" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -1,339 +0,0 @@
\timing
create table hits_10m
(
WatchID INTEGER,
JavaEnable INTEGER,
Title VARCHAR(1024),
GoodEvent INTEGER,
EventTime DATETIME,
EventDate DATE,
CounterID INTEGER,
ClientIP INTEGER,
RegionID INTEGER,
UserID INTEGER,
CounterClass INTEGER,
OS INTEGER,
UserAgent INTEGER,
URL VARCHAR(6072),
Referer VARCHAR(2048),
Refresh INTEGER,
RefererCategoryID INTEGER,
RefererRegionID INTEGER,
URLCategoryID INTEGER,
URLRegionID INTEGER,
ResolutionWidth INTEGER,
ResolutionHeight INTEGER,
ResolutionDepth INTEGER,
FlashMajor INTEGER,
FlashMinor INTEGER,
FlashMinor2 VARCHAR(256),
NetMajor INTEGER,
NetMinor INTEGER,
UserAgentMajor INTEGER,
UserAgentMinor CHAR(2),
CookieEnable INTEGER,
JavascriptEnable INTEGER,
IsMobile INTEGER,
MobilePhone INTEGER,
MobilePhoneModel VARCHAR(80),
Params VARCHAR(2048),
IPNetworkID INTEGER,
TraficSourceID INTEGER,
SearchEngineID INTEGER,
SearchPhrase VARCHAR(1024),
AdvEngineID INTEGER,
IsArtifical INTEGER,
WindowClientWidth INTEGER,
WindowClientHeight INTEGER,
ClientTimeZone INTEGER,
ClientEventTime DATETIME,
SilverlightVersion1 INTEGER,
SilverlightVersion2 INTEGER,
SilverlightVersion3 INTEGER,
SilverlightVersion4 INTEGER,
PageCharset VARCHAR(80),
CodeVersion INTEGER,
IsLink INTEGER,
IsDownload INTEGER,
IsNotBounce INTEGER,
FUniqID INTEGER,
OriginalURL VARCHAR(6072),
HID INTEGER,
IsOldCounter INTEGER,
IsEvent INTEGER,
IsParameter INTEGER,
DontCountHits INTEGER,
WithHash INTEGER,
HitColor CHAR(1),
LocalEventTime DATETIME,
Age INTEGER,
Sex INTEGER,
Income INTEGER,
Interests INTEGER,
Robotness INTEGER,
RemoteIP INTEGER,
WindowName INTEGER,
OpenerName INTEGER,
HistoryLength INTEGER,
BrowserLanguage CHAR(2),
BrowserCountry CHAR(2),
SocialNetwork VARCHAR(128),
SocialAction VARCHAR(128),
HTTPError INTEGER,
SendTiming INTEGER,
DNSTiming INTEGER,
ConnectTiming INTEGER,
ResponseStartTiming INTEGER,
ResponseEndTiming INTEGER,
FetchTiming INTEGER,
SocialSourceNetworkID INTEGER,
SocialSourcePage VARCHAR(128),
ParamPrice INTEGER,
ParamOrderID VARCHAR(80),
ParamCurrency CHAR(3),
ParamCurrencyID INTEGER,
OpenstatServiceName VARCHAR(80),
OpenstatCampaignID VARCHAR(80),
OpenstatAdID VARCHAR(80),
OpenstatSourceID VARCHAR(80),
UTMSource VARCHAR(256),
UTMMedium VARCHAR(256),
UTMCampaign VARCHAR(256),
UTMContent VARCHAR(256),
UTMTerm VARCHAR(256),
FromTag VARCHAR(256),
HasGCLID INTEGER,
RefererHash INTEGER,
URLHash INTEGER,
CLID INTEGER
) ORDER BY CounterID, EventDate, UserID, EventTime;
\set input_file '''/opt/dumps/hits_10m_corrected.tsv'''
COPY hits_10m FROM :input_file DELIMITER E'\t' DIRECT;
create table hits_100m
(
WatchID INTEGER,
JavaEnable INTEGER,
Title VARCHAR(1024),
GoodEvent INTEGER,
EventTime DATETIME,
EventDate DATE,
CounterID INTEGER,
ClientIP INTEGER,
RegionID INTEGER,
UserID INTEGER,
CounterClass INTEGER,
OS INTEGER,
UserAgent INTEGER,
URL VARCHAR(6072),
Referer VARCHAR(2048),
Refresh INTEGER,
RefererCategoryID INTEGER,
RefererRegionID INTEGER,
URLCategoryID INTEGER,
URLRegionID INTEGER,
ResolutionWidth INTEGER,
ResolutionHeight INTEGER,
ResolutionDepth INTEGER,
FlashMajor INTEGER,
FlashMinor INTEGER,
FlashMinor2 VARCHAR(256),
NetMajor INTEGER,
NetMinor INTEGER,
UserAgentMajor INTEGER,
UserAgentMinor CHAR(2),
CookieEnable INTEGER,
JavascriptEnable INTEGER,
IsMobile INTEGER,
MobilePhone INTEGER,
MobilePhoneModel VARCHAR(80),
Params VARCHAR(2048),
IPNetworkID INTEGER,
TraficSourceID INTEGER,
SearchEngineID INTEGER,
SearchPhrase VARCHAR(1024),
AdvEngineID INTEGER,
IsArtifical INTEGER,
WindowClientWidth INTEGER,
WindowClientHeight INTEGER,
ClientTimeZone INTEGER,
ClientEventTime DATETIME,
SilverlightVersion1 INTEGER,
SilverlightVersion2 INTEGER,
SilverlightVersion3 INTEGER,
SilverlightVersion4 INTEGER,
PageCharset VARCHAR(80),
CodeVersion INTEGER,
IsLink INTEGER,
IsDownload INTEGER,
IsNotBounce INTEGER,
FUniqID INTEGER,
OriginalURL VARCHAR(6072),
HID INTEGER,
IsOldCounter INTEGER,
IsEvent INTEGER,
IsParameter INTEGER,
DontCountHits INTEGER,
WithHash INTEGER,
HitColor CHAR(1),
LocalEventTime DATETIME,
Age INTEGER,
Sex INTEGER,
Income INTEGER,
Interests INTEGER,
Robotness INTEGER,
RemoteIP INTEGER,
WindowName INTEGER,
OpenerName INTEGER,
HistoryLength INTEGER,
BrowserLanguage CHAR(2),
BrowserCountry CHAR(2),
SocialNetwork VARCHAR(128),
SocialAction VARCHAR(128),
HTTPError INTEGER,
SendTiming INTEGER,
DNSTiming INTEGER,
ConnectTiming INTEGER,
ResponseStartTiming INTEGER,
ResponseEndTiming INTEGER,
FetchTiming INTEGER,
SocialSourceNetworkID INTEGER,
SocialSourcePage VARCHAR(128),
ParamPrice INTEGER,
ParamOrderID VARCHAR(80),
ParamCurrency CHAR(3),
ParamCurrencyID INTEGER,
OpenstatServiceName VARCHAR(80),
OpenstatCampaignID VARCHAR(80),
OpenstatAdID VARCHAR(80),
OpenstatSourceID VARCHAR(80),
UTMSource VARCHAR(256),
UTMMedium VARCHAR(256),
UTMCampaign VARCHAR(256),
UTMContent VARCHAR(256),
UTMTerm VARCHAR(256),
FromTag VARCHAR(256),
HasGCLID INTEGER,
RefererHash INTEGER,
URLHash INTEGER,
CLID INTEGER
) ORDER BY CounterID, EventDate, UserID, EventTime;
\set input_file '''/opt/dumps/hits_100m_corrected.tsv'''
COPY hits_100m FROM :input_file DELIMITER E'\t' DIRECT;
create table hits_1000m
(
WatchID INTEGER,
JavaEnable INTEGER,
Title VARCHAR(1024),
GoodEvent INTEGER,
EventTime DATETIME,
EventDate DATE,
CounterID INTEGER,
ClientIP INTEGER,
RegionID INTEGER,
UserID INTEGER,
CounterClass INTEGER,
OS INTEGER,
UserAgent INTEGER,
URL VARCHAR(6072),
Referer VARCHAR(2048),
Refresh INTEGER,
RefererCategoryID INTEGER,
RefererRegionID INTEGER,
URLCategoryID INTEGER,
URLRegionID INTEGER,
ResolutionWidth INTEGER,
ResolutionHeight INTEGER,
ResolutionDepth INTEGER,
FlashMajor INTEGER,
FlashMinor INTEGER,
FlashMinor2 VARCHAR(256),
NetMajor INTEGER,
NetMinor INTEGER,
UserAgentMajor INTEGER,
UserAgentMinor CHAR(2),
CookieEnable INTEGER,
JavascriptEnable INTEGER,
IsMobile INTEGER,
MobilePhone INTEGER,
MobilePhoneModel VARCHAR(80),
Params VARCHAR(2048),
IPNetworkID INTEGER,
TraficSourceID INTEGER,
SearchEngineID INTEGER,
SearchPhrase VARCHAR(1024),
AdvEngineID INTEGER,
IsArtifical INTEGER,
WindowClientWidth INTEGER,
WindowClientHeight INTEGER,
ClientTimeZone INTEGER,
ClientEventTime DATETIME,
SilverlightVersion1 INTEGER,
SilverlightVersion2 INTEGER,
SilverlightVersion3 INTEGER,
SilverlightVersion4 INTEGER,
PageCharset VARCHAR(80),
CodeVersion INTEGER,
IsLink INTEGER,
IsDownload INTEGER,
IsNotBounce INTEGER,
FUniqID INTEGER,
OriginalURL VARCHAR(6072),
HID INTEGER,
IsOldCounter INTEGER,
IsEvent INTEGER,
IsParameter INTEGER,
DontCountHits INTEGER,
WithHash INTEGER,
HitColor CHAR(1),
LocalEventTime DATETIME,
Age INTEGER,
Sex INTEGER,
Income INTEGER,
Interests INTEGER,
Robotness INTEGER,
RemoteIP INTEGER,
WindowName INTEGER,
OpenerName INTEGER,
HistoryLength INTEGER,
BrowserLanguage CHAR(2),
BrowserCountry CHAR(2),
SocialNetwork VARCHAR(128),
SocialAction VARCHAR(128),
HTTPError INTEGER,
SendTiming INTEGER,
DNSTiming INTEGER,
ConnectTiming INTEGER,
ResponseStartTiming INTEGER,
ResponseEndTiming INTEGER,
FetchTiming INTEGER,
SocialSourceNetworkID INTEGER,
SocialSourcePage VARCHAR(128),
ParamPrice INTEGER,
ParamOrderID VARCHAR(80),
ParamCurrency CHAR(3),
ParamCurrencyID INTEGER,
OpenstatServiceName VARCHAR(80),
OpenstatCampaignID VARCHAR(80),
OpenstatAdID VARCHAR(80),
OpenstatSourceID VARCHAR(80),
UTMSource VARCHAR(256),
UTMMedium VARCHAR(256),
UTMCampaign VARCHAR(256),
UTMContent VARCHAR(256),
UTMTerm VARCHAR(256),
FromTag VARCHAR(256),
HasGCLID INTEGER,
RefererHash INTEGER,
URLHash INTEGER,
CLID INTEGER
) ORDER BY CounterID, EventDate, UserID, EventTime;
\set input_file '''/opt/dumps/hits_1000m_corrected.tsv'''
COPY hits_1000m FROM :input_file DELIMITER E'\t' DIRECT;

View File

@ -1,43 +0,0 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum_float(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, MAX(URL), count(*) FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(OCTET_LENGTH(URL)) AS l, count(*) FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT SUBSTRB(SUBSTRB(Referer, POSITIONB(Referer, '//') + 2), 1, GREATEST(0, POSITIONB(SUBSTRB(Referer, POSITIONB(Referer, '//') + 2), '/') - 1)) AS key, avg(OCTET_LENGTH(Referer)) AS l, count(*) AS c, MAX(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT URL, count(*) FROM {table} GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
SELECT 1, URL, count(*) FROM {table} GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT TIME_SLICE(EventTime, 1, 'MINUTE') AS Minute, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54464)
SET(VERSION_REVISION 54465)
SET(VERSION_MAJOR 22)
SET(VERSION_MINOR 7)
SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 7000c4e0033bb9e69050ab8ef73e8e7465f78059)
SET(VERSION_DESCRIBE v22.7.1.1-testing)
SET(VERSION_STRING 22.7.1.1)
SET(VERSION_GITHASH f4f05ec786a8b8966dd0ea2a2d7e39a8c7db24f4)
SET(VERSION_DESCRIBE v22.8.1.1-testing)
SET(VERSION_STRING 22.8.1.1)
# end of autochange

View File

@ -19,6 +19,7 @@ option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1)
option (ENABLE_AVX "Use AVX instructions on x86_64" 0)
option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0)
option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0)
option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0)
option (ENABLE_BMI "Use BMI instructions on x86_64" 0)
option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0)
option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0)
@ -151,6 +152,20 @@ elseif (ARCH_AMD64)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
set (TEST_FLAG "-mavx512vbmi")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("
#include <immintrin.h>
int main() {
auto a = _mm512_permutexvar_epi8(__m512i(), __m512i());
(void)a;
return 0;
}
" HAVE_AVX512_VBMI)
if (HAVE_AVX512 AND ENABLE_AVX512 AND HAVE_AVX512_VBMI AND ENABLE_AVX512_VBMI)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
set (TEST_FLAG "-mbmi")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("

View File

@ -1,3 +1,19 @@
if (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED)
# During first run of cmake the toolchain file will be loaded twice,
# - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# But once you already have non-empty cmake cache it will be loaded only
# once:
# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake
#
# This has no harm except for double load of toolchain will add
# --gcc-toolchain multiple times that will not allow ccache to reuse the
# cache.
return()
endif()
set (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED ON)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")

View File

@ -20,16 +20,11 @@ if (COMPILER_CLANG)
# We want to get everything out of the compiler for code quality.
add_warning(everything)
add_warning(pedantic)
no_warning(vla-extension)
no_warning(zero-length-array)
no_warning(c11-extensions)
no_warning(unused-command-line-argument)
no_warning(c++98-compat-pedantic)
no_warning(c++98-compat)
no_warning(c99-extensions)
no_warning(conversion)
no_warning(ctad-maybe-unsupported) # clang 9+, linux-only
no_warning(deprecated-dynamic-exception-spec)
no_warning(disabled-macro-expansion)
no_warning(documentation-unknown-command)
no_warning(double-promotion)
@ -38,12 +33,7 @@ if (COMPILER_CLANG)
no_warning(global-constructors)
no_warning(missing-prototypes)
no_warning(missing-variable-declarations)
no_warning(nested-anon-types)
no_warning(packed)
no_warning(padded)
no_warning(return-std-move-in-c++11) # clang 7+
no_warning(shift-sign-overflow)
no_warning(sign-conversion)
no_warning(switch-enum)
no_warning(undefined-func-template)
no_warning(unused-template)

View File

@ -157,6 +157,8 @@ endif()
add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry)
add_contrib (base-x-cmake base-x)
add_contrib(c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear

1
contrib/c-ares vendored Submodule

@ -0,0 +1 @@
Subproject commit afee6748b0b99acf4509d42fa37ac8422262f91b

View File

@ -0,0 +1,35 @@
# Choose to build static or shared library for c-ares.
if (USE_STATIC_LIBRARIES)
set(CARES_STATIC ON CACHE BOOL "" FORCE)
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
else ()
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
set(CARES_SHARED ON CACHE BOOL "" FORCE)
endif ()
# Disable looking for libnsl on a platforms that has gethostbyname in glibc
#
# c-ares searching for gethostbyname in the libnsl library, however in the
# version that shipped with gRPC it doing it wrong [1], since it uses
# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in
# another dependent library. The upstream already contains correct macro [2],
# but it is not included in gRPC (even upstream gRPC, not the one that is
# shipped with clickhousee).
#
# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125
# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146
#
# And because if you by some reason have libnsl [3] installed, clickhouse will
# reject to start w/o it. While this is completelly different library.
#
# [3]: https://packages.debian.org/bullseye/libnsl2
if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS")
set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE)
endif()
# Force use of c-ares inet_net_pton instead of libresolv one
set(HAVE_INET_NET_PTON OFF CACHE BOOL "" FORCE)
add_subdirectory("../c-ares/" "../c-ares/")
add_library(ch_contrib::c-ares ALIAS c-ares)

View File

@ -2,7 +2,7 @@ set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest")
add_library(_gtest "${SRC_DIR}/src/gtest-all.cc")
set_target_properties(_gtest PROPERTIES VERSION "1.0.0")
target_compile_definitions (_gtest INTERFACE GTEST_HAS_POSIX_RE=0)
target_compile_definitions (_gtest PUBLIC GTEST_HAS_POSIX_RE=0)
target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/include")
target_include_directories(_gtest PRIVATE "${SRC_DIR}")

View File

@ -45,38 +45,11 @@ set(_gRPC_SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL)
# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party.
set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
# Choose to build static or shared library for c-ares.
if (USE_STATIC_LIBRARIES)
set(CARES_STATIC ON CACHE BOOL "" FORCE)
set(CARES_SHARED OFF CACHE BOOL "" FORCE)
else ()
set(CARES_STATIC OFF CACHE BOOL "" FORCE)
set(CARES_SHARED ON CACHE BOOL "" FORCE)
endif ()
# Disable looking for libnsl on a platforms that has gethostbyname in glibc
#
# c-ares searching for gethostbyname in the libnsl library, however in the
# version that shipped with gRPC it doing it wrong [1], since it uses
# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in
# another dependent library. The upstream already contains correct macro [2],
# but it is not included in gRPC (even upstream gRPC, not the one that is
# shipped with clickhousee).
#
# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125
# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146
#
# And because if you by some reason have libnsl [3] installed, clickhouse will
# reject to start w/o it. While this is completelly different library.
#
# [3]: https://packages.debian.org/bullseye/libnsl2
if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS")
set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE)
endif()
# We don't want to build C# extensions.
set(gRPC_BUILD_CSHARP_EXT OFF)
set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares)
set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE)
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
# The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes,

2
contrib/jemalloc vendored

@ -1 +1 @@
Subproject commit 78b58379c854a639df79beb3289351129d863d4b
Subproject commit 41a859ef7325569c6c25f92d294d45123bb81355

View File

@ -117,10 +117,11 @@ if (OS_DARWIN)
endif ()
add_library(_jemalloc ${SRCS})
# First include jemalloc-cmake files, to override anything that jemalloc has.
# (for example if you were trying to build jemalloc inside contrib/jemalloc you
# will have some files that may be out of date)
target_include_directories(_jemalloc PUBLIC include)
target_include_directories(_jemalloc SYSTEM PUBLIC include)
target_include_directories(_jemalloc PRIVATE "${LIBRARY_DIR}/include")
set (JEMALLOC_INCLUDE_PREFIX)

View File

@ -27,6 +27,10 @@
#if !defined(USE_MUSL)
#define JEMALLOC_OVERRIDE_MEMALIGN
#define JEMALLOC_OVERRIDE_VALLOC
#if defined(__linux__)
#define JEMALLOC_OVERRIDE_PVALLOC
#define JEMALLOC_OVERRIDE___LIBC_PVALLOC
#endif
#endif
/*

View File

@ -82,3 +82,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
void JEMALLOC_SYS_NOTHROW *je_valloc(size_t size) JEMALLOC_CXX_THROW
JEMALLOC_ATTR(malloc);
#endif
#ifdef JEMALLOC_OVERRIDE_PVALLOC
JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
void JEMALLOC_SYS_NOTHROW *je_pvalloc(size_t size) JEMALLOC_CXX_THROW
JEMALLOC_ATTR(malloc);
#endif

View File

@ -69,3 +69,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
void JEMALLOC_SYS_NOTHROW *jet_valloc(size_t size) JEMALLOC_CXX_THROW
JEMALLOC_ATTR(malloc);
#endif
#ifdef JEMALLOC_OVERRIDE_PVALLOC
JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
void JEMALLOC_SYS_NOTHROW *je_pvalloc(size_t size) JEMALLOC_CXX_THROW
JEMALLOC_ATTR(malloc);
#endif

View File

@ -36,6 +36,7 @@
#define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 0
/* #undef HAVE_RDTSCP */
/*
* Number of significant bits in virtual addresses. This may be less than the
@ -162,6 +163,12 @@
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/* JEMALLOC_PAGEID enabled page id */
/* #undef JEMALLOC_PAGEID */
/* JEMALLOC_HAVE_PRCTL checks prctl */
/* #undef JEMALLOC_HAVE_PRCTL */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
@ -422,4 +429,7 @@
/* Darwin VM_MAKE_TAG support */
#define JEMALLOC_HAVE_VM_MAKE_TAG
/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
/* #undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -36,6 +36,7 @@
#define CPU_SPINWAIT __asm__ volatile("pause")
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 1
#define HAVE_RDTSCP 1
/*
* Number of significant bits in virtual addresses. This may be less than the
@ -162,6 +163,12 @@
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/* JEMALLOC_PAGEID enabled page id */
/* #undef JEMALLOC_PAGEID */
/* JEMALLOC_HAVE_PRCTL checks prctl */
/* #undef JEMALLOC_HAVE_PRCTL */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
@ -422,4 +429,7 @@
/* Darwin VM_MAKE_TAG support */
#define JEMALLOC_HAVE_VM_MAKE_TAG
/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
/* #undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -36,6 +36,7 @@
#define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 0
/* #undef HAVE_RDTSCP */
/*
* Number of significant bits in virtual addresses. This may be less than the
@ -164,6 +165,12 @@
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/* JEMALLOC_PAGEID enabled page id */
/* #undef JEMALLOC_PAGEID */
/* JEMALLOC_HAVE_PRCTL checks prctl */
/* #undef JEMALLOC_HAVE_PRCTL */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
@ -424,4 +431,7 @@
/* Darwin VM_MAKE_TAG support */
/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */
/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
/* #undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -36,6 +36,7 @@
#define CPU_SPINWAIT __asm__ volatile("pause")
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 1
#define HAVE_RDTSCP 1
/*
* Number of significant bits in virtual addresses. This may be less than the
@ -164,6 +165,12 @@
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/* JEMALLOC_PAGEID enabled page id */
/* #undef JEMALLOC_PAGEID */
/* JEMALLOC_HAVE_PRCTL checks prctl */
/* #undef JEMALLOC_HAVE_PRCTL */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
@ -424,4 +431,7 @@
/* Darwin VM_MAKE_TAG support */
/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */
/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
/* #undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -38,6 +38,7 @@
#define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 0
/* #undef HAVE_RDTSCP */
/*
* Number of significant bits in virtual addresses. This may be less than the
@ -164,6 +165,12 @@
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/* JEMALLOC_PAGEID enabled page id */
#define JEMALLOC_PAGEID
/* JEMALLOC_HAVE_PRCTL checks prctl */
#define JEMALLOC_HAVE_PRCTL
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
@ -424,4 +431,7 @@
/* Darwin VM_MAKE_TAG support */
/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */
/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
#define JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

Some files were not shown because too many files have changed in this diff Show More