Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
Ivan Blinkov 2018-10-09 16:19:18 +03:00
commit 013602c269
8 changed files with 46 additions and 8 deletions

View File

@ -204,7 +204,7 @@ BlockIO InterpreterSystemQuery::execute()
throw Exception("There is no " + query.target_database + "." + query.target_table + " replicated table", throw Exception("There is no " + query.target_database + "." + query.target_table + " replicated table",
ErrorCodes::BAD_ARGUMENTS); ErrorCodes::BAD_ARGUMENTS);
break; break;
case Type::FLUSH_SYSTEM_TABLES: case Type::FLUSH_LOGS:
executeCommandsAndThrowIfError( executeCommandsAndThrowIfError(
[&] () { if (auto query_log = context.getQueryLog(false)) query_log->flush(); }, [&] () { if (auto query_log = context.getQueryLog(false)) query_log->flush(); },
[&] () { if (auto part_log = context.getPartLog("", false)) part_log->flush(); }, [&] () { if (auto part_log = context.getPartLog("", false)) part_log->flush(); },

View File

@ -65,8 +65,8 @@ const char * ASTSystemQuery::typeToString(Type type)
return "STOP REPLICATION QUEUES"; return "STOP REPLICATION QUEUES";
case Type::START_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES:
return "START REPLICATION QUEUES"; return "START REPLICATION QUEUES";
case Type::FLUSH_SYSTEM_TABLES: case Type::FLUSH_LOGS:
return "FLUSH SYSTEM TABLES"; return "FLUSH LOGS";
default: default:
throw Exception("Unknown SYSTEM query command", ErrorCodes::BAD_TYPE_OF_FIELD); throw Exception("Unknown SYSTEM query command", ErrorCodes::BAD_TYPE_OF_FIELD);
} }

View File

@ -39,7 +39,7 @@ public:
START_REPLICATEDS_SENDS, START_REPLICATEDS_SENDS,
STOP_REPLICATION_QUEUES, STOP_REPLICATION_QUEUES,
START_REPLICATION_QUEUES, START_REPLICATION_QUEUES,
FLUSH_SYSTEM_TABLES, FLUSH_LOGS,
END END
}; };

View File

@ -34,7 +34,7 @@ DROP TABLE test.null;"
heavy_cpu_query="SELECT ignore(sum(sipHash64(hex(sipHash64(hex(sipHash64(hex(number)))))))) FROM (SELECT * FROM system.numbers_mt LIMIT 1000000)" heavy_cpu_query="SELECT ignore(sum(sipHash64(hex(sipHash64(hex(sipHash64(hex(number)))))))) FROM (SELECT * FROM system.numbers_mt LIMIT 1000000)"
$CLICKHOUSE_CLIENT $settings --max_threads=1 -q "$heavy_cpu_query" $CLICKHOUSE_CLIENT $settings --max_threads=1 -q "$heavy_cpu_query"
$CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH SYSTEM TABLES" $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS"
$CLICKHOUSE_CLIENT $settings -q " $CLICKHOUSE_CLIENT $settings -q "
WITH WITH
any(query_duration_ms*1000) AS duration, any(query_duration_ms*1000) AS duration,
@ -53,7 +53,7 @@ SELECT
# Check ProfileEvents in query_thread_log # Check ProfileEvents in query_thread_log
$CLICKHOUSE_CLIENT $settings --max_threads=3 -q "$heavy_cpu_query" $CLICKHOUSE_CLIENT $settings --max_threads=3 -q "$heavy_cpu_query"
$CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH SYSTEM TABLES" $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS"
query_id=`$CLICKHOUSE_CLIENT $settings -q "SELECT query_id FROM system.query_log WHERE event_date >= today()-1 AND type=2 AND query='$heavy_cpu_query' ORDER BY event_time DESC LIMIT 1"` query_id=`$CLICKHOUSE_CLIENT $settings -q "SELECT query_id FROM system.query_log WHERE event_date >= today()-1 AND type=2 AND query='$heavy_cpu_query' ORDER BY event_time DESC LIMIT 1"`
query_elapsed=`$CLICKHOUSE_CLIENT $settings -q "SELECT query_duration_ms*1000 FROM system.query_log WHERE event_date >= today()-1 AND type=2 AND query_id='$query_id' ORDER BY event_time DESC LIMIT 1"` query_elapsed=`$CLICKHOUSE_CLIENT $settings -q "SELECT query_duration_ms*1000 FROM system.query_log WHERE event_date >= today()-1 AND type=2 AND query_id='$query_id' ORDER BY event_time DESC LIMIT 1"`
threads=`$CLICKHOUSE_CLIENT $settings -q "SELECT length(thread_numbers) FROM system.query_log WHERE event_date >= today()-1 AND type=2 AND query_id='$query_id' ORDER BY event_time DESC LIMIT 1"` threads=`$CLICKHOUSE_CLIENT $settings -q "SELECT length(thread_numbers) FROM system.query_log WHERE event_date >= today()-1 AND type=2 AND query_id='$query_id' ORDER BY event_time DESC LIMIT 1"`

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
cur_name=${BASH_SOURCE[0]}
settings="$server_logs --log_queries=1 --log_query_threads=1 --log_profile_events=1 --log_query_settings=1"
# Test insert logging on each block and checkPacket() method
$CLICKHOUSE_CLIENT $settings -n -q "
DROP TABLE IF EXISTS test.merge_tree_table;
CREATE TABLE test.merge_tree_table (id UInt64, date Date, uid UInt32) ENGINE = MergeTree(date, id, 8192);"
$CLICKHOUSE_CLIENT $settings -q "INSERT INTO test.merge_tree_table SELECT (intHash64(number)) % 10000, toDate('2018-08-01'), rand() FROM system.numbers LIMIT 10000000;"
$CLICKHOUSE_CLIENT $settings -q "OPTIMIZE TABLE test.merge_tree_table FINAL;"
toching_many_parts_query="SELECT count() from (SELECT toDayOfWeek(date) as m, id, count() FROM test.merge_tree_table GROUP BY id, m ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1)"
$CLICKHOUSE_CLIENT $settings -q "$toching_many_parts_query" &> /dev/null
$CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS"
$CLICKHOUSE_CLIENT $settings -q "SELECT pi.Values FROM system.query_log ARRAY JOIN ProfileEvents as pi WHERE query='$toching_many_parts_query' and pi.Names = 'FileOpen' ORDER BY event_time DESC LIMIT 1;"
$CLICKHOUSE_CLIENT $settings -q "DROP TABLE IF EXISTS test.merge_tree_table;"

View File

@ -2,7 +2,7 @@
# Dictionary # Dictionary
Движок `Dictionary` отображает данные словаря как таблицу ClickHouse. Движок `Dictionary` отображает данные [словаря](../../query_language/dicts/external_dicts.md) как таблицу ClickHouse.
Рассмотрим для примера словарь `products` со следующей конфигурацией: Рассмотрим для примера словарь `products` со следующей конфигурацией:

View File

@ -1 +0,0 @@
../../en/faq/general.md

10
docs/zh/faq/general.md Normal file
View File

@ -0,0 +1,10 @@
# 常见问题
## 为什么不使用MapReduce之类的产品呢?
我们可以将MapReduce这类的系统称为分布式计算系统其reduce操作基于分布式排序。其中最常见的开源解决方案是[Apache Hadoop](http://hadoop.apache.org)。 Yandex使用他们的内部解决方案YT。
这些系统不适合在线查询因为它们的延迟高。换句话说它们不能用作Web接口的后端服务。这些系统对于实时数据更新是没有用的。如果操作的结果和所有中间结果如果有的话位于单个服务器的内存中则分布式排序不是执行reduce操作的最佳方式但这通常是在线查询的情况。在这种情况下哈希表是执行reduce操作的最佳方式。优化map-reduce任务的常用方法是使用内存中的哈希表进行预聚合部分reduce用户手动执行此优化操作。分布式排序是运行简单map-reduce任务时性能降低的主要原因之一。
大多数MapReduce系统允许您在集群上执行任意代码。但是声明性查询语言更适合OLAP以便快速运行实验。例如Hadoop包含Hive和PigCloudera Impala或Shark过时for Spark以及Spark SQL、Presto和Apache Drill。与专业系统相比运行此类任务时的性能非常不理想所以将这些系统用作Web接口的后端服务是不现实的因为延迟相对较高。