This commit is contained in:
Evgeniy Gatov 2015-03-30 18:19:51 +03:00
commit d7494a543a
63 changed files with 1640 additions and 1184 deletions

View File

@ -0,0 +1,18 @@
#!/bin/bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(clickhouse-client --time --format=Null --query="$query" 2>&1)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -1,366 +0,0 @@
#!/bin/bash
test_table="hits_100m"
start_date="'2013-07-01'"
early_stop_date="'2013-07-02'"
stop_date="'2013-07-31'"
counter_id=34
function run_ck_server
{
sudo sh -c " ulimit -v 54000000; /etc/init.d/clickhouse-server restart"
}
# execute queries
function execute()
{
queries=("${@}")
queries_count=${#queries[@]}
if [ -z $TIMES ]; then
TIMES=1
fi
index=0
comment_re='\#.*'
while [ "$index" -lt "$queries_count" ]; do
query=${queries[$index]}
if [[ $query =~ $comment_re ]]; then
echo "$query"
echo
else
sync
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
for i in $(seq $TIMES)
do
expect -f ./expect.tcl "$query"
if [ "$?" != "0" ]; then
echo "Error: $?"
#break
fi
# restart clickhouse if failed
ps aux | grep -P '\d+ clickhouse-server'
if [ "$?" != "0" ]; then
run_ck_server
fi
done
fi
let "index = $index + 1"
echo "Ran $index queries." >&2
done
}
init_queries=(
# DB structure with array arguments
#"CREATE TABLE $test_table ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32 ) ENGINE = MergeTree(EventDate, intHash32(UserID), tuple(CounterID, EventDate, intHash32(UserID), EventTime), 8192);"
#DB structure without array arguments
#"CREATE TABLE $test_table ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32 ) ENGINE = MergeTree(EventDate, intHash32(UserID), tuple(CounterID, EventDate, intHash32(UserID), EventTime), 8192);"
#modified table without uint
"CREATE TABLE $test_table ( WatchID Int64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID Int64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID Int64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash Int64, URLHash Int64, CLID UInt32, UserIDHash UInt64 ) ENGINE = MergeTree(EventDate, intHash32(UserID), tuple(CounterID, EventDate, intHash32(UserID), EventTime), 8192);"
)
test_queries=(
"SELECT count() FROM $test_table;"
"SELECT count() FROM $test_table WHERE AdvEngineID != 0;"
"SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM $test_table;"
"SELECT sum(UserID) FROM $test_table;"
"SELECT uniq(UserID) FROM $test_table;"
"SELECT uniq(SearchPhrase) FROM $test_table;"
"SELECT min(EventDate), max(EventDate) FROM $test_table;"
"SELECT AdvEngineID, count() FROM $test_table WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC;"
"#- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;"
"SELECT RegionID, uniq(UserID) AS u FROM $test_table GROUP BY RegionID ORDER BY u DESC LIMIT 10;"
"#- агрегация, среднее количество ключей.;"
"SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM $test_table GROUP BY RegionID ORDER BY c DESC LIMIT 10;"
"#- агрегация, среднее количество ключей, несколько агрегатных функций.;"
"SELECT MobilePhoneModel, uniq(UserID) AS u FROM $test_table WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;"
"#- мощная фильтрация по строкам, затем агрегация по строкам.;"
"SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM $test_table WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;"
"#- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;"
"SELECT SearchPhrase, count() AS c FROM $test_table WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;"
"#- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;"
"SELECT SearchPhrase, uniq(UserID) AS u FROM $test_table WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;"
"#- агрегация чуть сложнее.;"
"SELECT SearchEngineID, SearchPhrase, count() AS c FROM $test_table WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;"
"#- агрегация по числу и строке, большое количество ключей.;"
"SELECT UserID, count() FROM $test_table GROUP BY UserID ORDER BY count() DESC LIMIT 10;"
"#- агрегация по очень большому количеству ключей, может не хватить оперативки.;"
"SELECT UserID, SearchPhrase, count() FROM $test_table GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10;"
"#- ещё более сложная агрегация.;"
"SELECT UserID, SearchPhrase, count() FROM $test_table GROUP BY UserID, SearchPhrase LIMIT 10;"
"#- то же самое, но без сортировки.;"
"SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM $test_table GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10;"
"#- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;"
"SELECT UserID FROM $test_table WHERE UserID = 12345678901234567890;"
"#- мощная фильтрация по столбцу типа UInt64.;"
"SELECT count() FROM $test_table WHERE URL LIKE '%metrika%';"
"#- фильтрация по поиску подстроки в строке.;"
"SELECT SearchPhrase, any(URL), count() AS c FROM $test_table WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;"
"#- вынимаем большие столбцы, фильтрация по строке.;"
"SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM $test_table WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;"
"#- чуть больше столбцы.;"
"SELECT * FROM $test_table WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;"
"#- плохой запрос - вынимаем все столбцы.;"
"SELECT SearchPhrase FROM $test_table WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;"
"#- большая сортировка.;"
"SELECT SearchPhrase FROM $test_table WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;"
"#- большая сортировка по строкам.;"
"SELECT SearchPhrase FROM $test_table WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;"
"#- большая сортировка по кортежу.;"
"SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM $test_table WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;"
"#- считаем средние длины URL для крупных счётчиков.;"
"SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM $test_table WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;"
"#- то же самое, но с разбивкой по доменам.;"
"SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM $test_table;"
"#- много тупых агрегатных функций.;"
"SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM $test_table WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;"
"#- сложная агрегация, для больших таблиц может не хватить оперативки.;"
"SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM $test_table WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;"
"#- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;"
"SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM $test_table GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;"
"#- то же самое, но ещё и без фильтрации.;"
"SELECT URL, count() AS c FROM $test_table GROUP BY URL ORDER BY c DESC LIMIT 10;"
"#- агрегация по URL.;"
"SELECT 1, URL, count() AS c FROM $test_table GROUP BY 1, URL ORDER BY c DESC LIMIT 10;"
"#- агрегация по URL и числу.;"
"SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM $test_table GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10;"
"SELECT
URL,
count() AS PageViews
FROM $test_table
WHERE
CounterID = $counter_id
AND EventDate >= toDate($start_date)
AND EventDate <= toDate($stop_date)
AND NOT DontCountHits
AND NOT Refresh
AND notEmpty(URL)
GROUP BY URL
ORDER BY PageViews DESC
LIMIT 10;"
"SELECT
Title,
count() AS PageViews
FROM $test_table
WHERE
CounterID = $counter_id
AND EventDate >= toDate($start_date)
AND EventDate <= toDate($stop_date)
AND NOT DontCountHits
AND NOT Refresh
AND notEmpty(Title)
GROUP BY Title
ORDER BY PageViews DESC
LIMIT 10;"
"SELECT
URL,
count() AS PageViews
FROM $test_table
WHERE
CounterID = $counter_id
AND EventDate >= toDate($start_date)
AND EventDate <= toDate($stop_date)
AND NOT Refresh
AND IsLink
AND NOT IsDownload
GROUP BY URL
ORDER BY PageViews DESC
LIMIT 1000;"
"SELECT
TraficSourceID,
SearchEngineID,
AdvEngineID,
((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src,
URL AS Dst,
count() AS PageViews
FROM $test_table
WHERE
CounterID = $counter_id
AND EventDate >= toDate($start_date)
AND EventDate <= toDate($stop_date)
AND NOT Refresh
GROUP BY
TraficSourceID,
SearchEngineID,
AdvEngineID,
Src,
Dst
ORDER BY PageViews DESC
LIMIT 1000;"
"SELECT
URLHash,
EventDate,
count() AS PageViews
FROM $test_table
WHERE
CounterID = $counter_id
AND EventDate >= toDate($start_date)
AND EventDate <= toDate($stop_date)
AND NOT Refresh
AND TraficSourceID IN (-1, 6)
AND RefererHash = halfMD5('http://example.ru/')
GROUP BY
URLHash,
EventDate
ORDER BY PageViews DESC
LIMIT 100000;"
"SELECT
WindowClientWidth,
WindowClientHeight,
count() AS PageViews
FROM $test_table
WHERE
CounterID = $counter_id
AND EventDate >= toDate($start_date)
AND EventDate <= toDate($stop_date)
AND NOT Refresh
AND NOT DontCountHits
AND URLHash = halfMD5('http://example.ru/')
GROUP BY
WindowClientWidth,
WindowClientHeight
ORDER BY PageViews DESC
LIMIT 10000;"
"SELECT
toStartOfMinute(EventTime) AS Minute,
count() AS PageViews
FROM $test_table
WHERE
CounterID = $counter_id
AND EventDate >= toDate($start_date)
AND EventDate <= toDate($early_stop_date)
AND NOT Refresh
AND NOT DontCountHits
GROUP BY
Minute
ORDER BY Minute;"
)
function test {
TIMES=3
execute "${test_queries[@]}"
}
function init {
execute "${init_queries[@]}"
}
function debug {
TIMES=3
debug_queries=(
)
execute "${debug_queries[@]}"
}
function usage {
cat <<EOF
usage: $0 options
This script run benhmark for clickhouse
OPTIONS:
-h Show this message
-d Run debug queries
-i Init database
-p log_file Parse log file to columns with result
-t Run tests
EOF
}
function parse_log {
results=$(cat $1 | grep -P 'Elapsed: \d+.\d+ ' | awk '{print $6}')
index=1
for res in $results
do
echo -n "$res "
let "index=$index % 3"
if [ "$index" == "0" ]; then
echo
fi
let "index=$index + 1"
done
}
if [ "$#" == "0" ]; then
usage
exit 0
fi
echo "Start date" $(date)
while getopts “hitdp:” OPTION
do
case $OPTION in
h)
usage
exit 0
;;
i)
init
;;
t)
test
;;
d)
debug
;;
p)
parse_log $OPTARG
;;
?)
usage
exit 0
;;
esac
done
echo "Stop date" $(date)

View File

@ -1,4 +0,0 @@
CONF_DIR=/home/kartavyy/benchmark/clickhouse
expect_file=$CONF_DIR/expect.tcl
test_file=$CONF_DIR/queries.sql
etc_init_d_service=/etc/init.d/clickhouse-server-metrika-yandex

View File

@ -1,13 +0,0 @@
#!/bin/expect
# Set timeout
set timeout 600
# Get arguments
set query [lindex $argv 0]
spawn clickhouse-client --multiline;
expect ":) "
send "$query;\r";
expect ":) "
send "quit";

View File

@ -1,109 +1,43 @@
SELECT count() FROM hits_10m; SELECT count() FROM {table};
SELECT count() FROM hits_10m WHERE AdvEngineID != 0; SELECT count() FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM hits_10m ; SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM {table} ;
SELECT sum(UserID) FROM hits_10m ; SELECT sum(UserID) FROM {table} ;
SELECT uniq(UserID) FROM hits_10m ; SELECT uniq(UserID) FROM {table} ;
SELECT uniq(SearchPhrase) FROM hits_10m ; SELECT uniq(SearchPhrase) FROM {table} ;
SELECT min(EventDate), max(EventDate) FROM hits_10m ; SELECT min(EventDate), max(EventDate) FROM {table} ;
SELECT AdvEngineID, count() FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC;
SELECT AdvEngineID, count() FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC; SELECT RegionID, uniq(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.; SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT RegionID, uniq(UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10; SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- агрегация, среднее количество ключей.; SELECT SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, uniq(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10; SELECT SearchEngineID, SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
-- агрегация, среднее количество ключей, несколько агрегатных функций.; SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10;
SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10;
SELECT MobilePhoneModel, uniq(UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по строкам.; SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890;
SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; SELECT count() FROM {table} WHERE URL LIKE '%metrika%';
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.; SELECT SearchPhrase, any(URL), count() AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, count() AS c FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.; SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase, uniq(UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-- агрегация чуть сложнее.; SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
SELECT SearchEngineID, SearchPhrase, count() AS c FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
-- агрегация по числу и строке, большое количество ключей.; SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT UserID, count() FROM hits_10m GROUP BY UserID ORDER BY count() DESC LIMIT 10; SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
-- агрегация по очень большому количеству ключей, может не хватить оперативки.; SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT UserID, SearchPhrase, count() FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10; SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM {table} GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10;
-- ещё более сложная агрегация.; SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT UserID, SearchPhrase, count() FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10; SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
-- то же самое, но без сортировки.; SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM hits_10m GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10; SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.; SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;
SELECT UserID FROM hits_10m WHERE UserID = 12345678901234567890;
-- мощная фильтрация по столбцу типа UInt64.;
SELECT count() FROM hits_10m WHERE URL LIKE '%metrika%';
-- фильтрация по поиску подстроки в строке.;
SELECT SearchPhrase, any(URL), count() AS c FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
-- вынимаем большие столбцы, фильтрация по строке.;
SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
-- чуть больше столбцы.;
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
-- плохой запрос - вынимаем все столбцы.;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
-- большая сортировка.;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
-- большая сортировка по строкам.;
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-- большая сортировка по кортежу.;
SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
-- считаем средние длины URL для крупных счётчиков.;
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM hits_10m WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
-- то же самое, но с разбивкой по доменам.;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m ;
-- много тупых агрегатных функций.;
SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
-- то же самое, но ещё и без фильтрации.;
SELECT URL, count() AS c FROM hits_10m GROUP BY URL ORDER BY c DESC LIMIT 10;
-- агрегация по URL.;
SELECT 1, URL, count() AS c FROM hits_10m GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
-- агрегация по URL и числу.;
SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_10m GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM hits_10m WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;

View File

@ -1,18 +1,3 @@
path=/opt/dump/dump_0.3 #!/bin/bash
db_name=hits_1b
num=1000000000
dump_replaced=$path/dump_"$db_name"_replaced.tsv table=hits_10m; time clickhouse-client --max_bytes_before_external_sort=30000000000 --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID FROM $table ORDER BY rand()" | corrector_utf8 > /opt/dumps/${table}_corrected.tsv
dump_meshed=$path/dump_"$db_name"_meshed.tsv
dump_meshed_utf8=$path/dump_"$db_name"_meshed_utf8.tsv
clickhouse-client --query="SET GLOBAL max_block_size=100000"
clickhouse-client --query="SET GLOBAL max_threads=1"
clickhouse-client --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID, toInt64(intHash32(UserID)) FROM hits_mt_test_1b LIMIT $num FORMAT TabSeparated" > $dump_replaced
/etc/init.d/clickhouse-server-metrika-yandex-ulimit restart
sudo nsort -format=maximum_size:65535 -k1 -T /opt -o $dump_meshed $dump_replaced
cat $dump_meshed | iconv -futf8 -tutf8//IGNORE 2>/dev/null 1> $dump_meshed_utf8

View File

@ -1,26 +0,0 @@
#!/bin/sh
if [[ $# -ne 0 ]]; then
echo "usage: if memory limit is exceeded kill process with biggest memory consumption"
exit 1
fi
while [ 1=1 ];
do
FREE_MEMORY_MB=$(free -m | sed -n '3,3p' | awk '{print $4}')
PID="$(ps -eF --sort -rss | sed -n '2,2p' | awk '{print $2}')"
NAME="$(ps -eF --sort -rss | sed -n '2,2p' | awk '{print $11}')"
SIZEGB="$(ps -eF --sort -rss | sed -n '2,2p' | awk '{print $6}')"
SIZEGB=$(($SIZEGB/1024/1024))
echo "Process id ="$PID" Size = "$SIZEGB" GB" "Free Memory = " $FREE_MEMORY_MB" MB"
if (( $FREE_MEMORY_MB < 512 ));
then echo "Killing the process with biggest memory consumption......"
sudo kill -9 $PID
echo "$(date) Killed the process with PID: $PID NAME: $NAME"
else
echo "SIZE has not yet exceeding"
fi
sleep 10
done

View File

@ -1,114 +0,0 @@
from optparse import OptionParser
import argparse
import re
import sys
def log_to_rows(filename, pattern_select, time_pattern, pattern_ignore):
time_matcher = re.compile(time_pattern)
select_matcher = re.compile(pattern_select, re.IGNORECASE);
ignore_matcher = re.compile(pattern_ignore)
f = open(filename, 'r');
query = ''
raw_time = ''
for line in f:
if ignore_matcher.match(line):
continue
m = select_matcher.search(line)
if m :
if line != query:
query = line
sys.stdout.write("\n")
raw_time = raw_time + "\n"
m = time_matcher.search(line)
if m:
sec = 0
minute = 0
ms = 0
if 'min' in m.groupdict() and m.group('min'):
minute = float(m.group('min').replace(',','.'))
if 'sec' in m.groupdict() and m.group('sec'):
sec = float(m.group('sec').replace(',','.'))
if 'ms' in m.groupdict() and m.group('ms'):
ms = float(m.group('ms').replace(',', '.'))
sys.stdout.write( str(minute*60 + sec + ms/1000.) + " " )
raw_time = raw_time + " | " + m.group('time')
print
print " =======raw time====== \n" + raw_time
def process_log(filename, pattern_select, time_pattern, pattern_ignore, error_pattern):
time_matcher = re.compile(time_pattern)
select_matcher = re.compile(pattern_select, re.IGNORECASE);
ignore_matcher = re.compile(pattern_ignore)
error_matcher = re.compile(error_pattern, re.IGNORECASE)
f = open(filename, 'r');
query = ''
for line in f:
if error_matcher.match(line):
print line
continue
if ignore_matcher.match(line):
continue
m = select_matcher.search(line)
if m :
if line != query:
sys.stdout.flush()
query = line
print "\n\n"
print query
m = time_matcher.search(line)
if m:
sys.stdout.write(m.group('time') + " " )
def main():
parser = argparse.ArgumentParser(description="Process log files form different databases")
parser.add_argument('log_file', metavar = 'log_file', help = 'database log file')
parser.add_argument('db_name', metavar = 'db_name', help = ' database name one of clickhouse, vertica, infinidb, monetdb, infobright, hive (... more later)')
args = parser.parse_args()
log_file = args.log_file
db_name = args.db_name
time_pattern = ''
select_pattern = r'query: select '
ignore_pattern = r'#'
error_pattern = r'error .*'
if db_name == 'clickhouse':
time_pattern = r'(?P<time>(?P<sec>\d+.\d{3}) sec\.)'
select_pattern = r'query\: select '
ignore_pattern = r':\).*'
elif db_name == 'vertica' :
time_pattern = r'(?P<time>(?P<ms>\d+.\d+) ms\.)'
select_pattern = r'select '
ignore_pattern = r'(.*dbadmin=>|query:|.*Timing is on\.).*'
elif db_name == 'infinidb' :
time_pattern = r'(?P<time>(?:(?P<min>\d+) min )?(?P<sec>\d+.\d+) sec)'
ignore_pattern = r'Query OK, 0 rows affected \(0\.00 sec\)'
elif db_name == 'monetdb' :
time_pattern = r'tuples? \((?P<time>(?:(?P<min>\d+)m )?(?:(?P<sec>\d+.?\d+)s)?(?:(?P<ms>\d+.\d+)ms)?)\)'
elif db_name == 'infobright' :
time_pattern = r'(?P<time>(?:(?P<min>\d+) min ){0,1}(?P<sec>\d+.\d+) sec)'
elif db_name == 'hive':
time_pattern = r'Time taken\: (?P<time>(?:(?P<sec>\d+.?\d+) seconds))'
error_pattern = r'failed\: .*'
elif db_name == 'mysql':
time_pattern = r'(?P<time>(?:(?P<min>\d+) min )?(?P<sec>\d+.\d+) sec)'
else:
sys.exit("unknown db_name")
process_log(log_file, select_pattern, time_pattern, ignore_pattern, error_pattern )
log_to_rows(log_file, select_pattern, time_pattern, ignore_pattern )
main()

View File

@ -0,0 +1,40 @@
Quick installation instructions
-------------------------------
Register on my.vertica.com
https://my.vertica.com/download-community-edition/
Download HP Vertica 7.1.1 Analytic Database Server, Debian or Ubuntu 14.04 version.
sudo apt-get install sysstat pstack mcelog
sudo dpkg -i vertica_7.1.1-0_amd64.deb
sudo sh -c "echo 'export TZ=Europe/Moscow' >> /home/dbadmin/.bash_profile"
sudo /opt/vertica/sbin/install_vertica --hosts=127.0.0.1 --failure-threshold=NONE
sudo mkdir /opt/vertica-data/
sudo chown dbadmin /opt/vertica-data/
sudo su dbadmin
/opt/vertica/bin/adminTools
configuration menu
create database
name: default
empty password
both directories: /opt/vertica-data/
main menu
exit
PS. Note that Vertica doesn't support IPv6.
How to prepare data
-------------------
Prepare dumps with script create_dump.sh for tables hits_10m, hits_100m, hits_1000m. It takes about 5 hours (1m41.882s, 25m11.103s, 276m36.388s).
Start vsql command line client.
Create tables with queries from hits_define_schema.sql.
Time to insert data:
hits_10m: 91 sec.
hits_100m: 774 sec.
hits_1000m:
You need to validate number of rows with SELECT count(*).

View File

@ -0,0 +1,24 @@
#!/bin/bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$((echo '\timing'; echo "$query") |
/opt/vertica/bin/vsql -U dbadmin |
grep -oP 'All rows formatted: [^ ]+ ms' |
ssed -R -e 's/^All rows formatted: ([\d,]+) ms$/\1/' |
tr ',' '.')
[[ "$?" == "0" ]] && echo -n "$(perl -e "print ${RES} / 1000")" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
echo "],"
done

View File

@ -1,21 +0,0 @@
#!/bin/bash
#!/bin/expect
# Set timeout
set timeout 600
# Get arguments
set query [lindex $argv 0]
spawn vsql -eU dbadmin
expect "dbadmin=>"
send "\\timing\r"
expect "dbadmin=>"
send "$query\r"
expect "dbadmin=>"
send "\\q\r"
expect eof

View File

@ -1,6 +1,6 @@
\timing \timing
create table hits_10m_meshed create table hits_10m
( (
WatchID INTEGER, WatchID INTEGER,
JavaEnable INTEGER, JavaEnable INTEGER,
@ -106,17 +106,14 @@ create table hits_10m_meshed
HasGCLID INTEGER, HasGCLID INTEGER,
RefererHash INTEGER, RefererHash INTEGER,
URLHash INTEGER, URLHash INTEGER,
CLID INTEGER, CLID INTEGER
UserIDHash INTEGER ) ORDER BY CounterID, EventDate, UserID, EventTime;
) ORDER BY CounterID, EventDate, UserIDHash, EventTime;
\set input_file '''/opt/dumps/hits_10m_corrected.tsv'''
COPY hits_10m FROM :input_file DELIMITER E'\t' DIRECT;
\set input_file '''/opt/dump/dump_0.3/dump_hits_10m_meshed.tsv''' create table hits_100m
COPY hits_10m_meshed FROM :input_file DELIMITER E'\t' DIRECT;
create table hits_100m_meshed
( (
WatchID INTEGER, WatchID INTEGER,
JavaEnable INTEGER, JavaEnable INTEGER,
@ -222,17 +219,14 @@ create table hits_100m_meshed
HasGCLID INTEGER, HasGCLID INTEGER,
RefererHash INTEGER, RefererHash INTEGER,
URLHash INTEGER, URLHash INTEGER,
CLID INTEGER, CLID INTEGER
UserIDHash INTEGER ) ORDER BY CounterID, EventDate, UserID, EventTime;
) ORDER BY CounterID, EventDate, UserIDHash, EventTime;;
\set input_file '''/opt/dump/dump_0.3/dump_hits_100m_meshed.tsv''' \set input_file '''/opt/dumps/hits_100m_corrected.tsv'''
COPY hits_100m_meshed FROM :input_file DELIMITER E'\t' DIRECT; COPY hits_100m FROM :input_file DELIMITER E'\t' DIRECT;
create table hits_1000m
create table hits_1b_meshed
( (
WatchID INTEGER, WatchID INTEGER,
JavaEnable INTEGER, JavaEnable INTEGER,
@ -338,10 +332,8 @@ create table hits_1b_meshed
HasGCLID INTEGER, HasGCLID INTEGER,
RefererHash INTEGER, RefererHash INTEGER,
URLHash INTEGER, URLHash INTEGER,
CLID INTEGER, CLID INTEGER
UserIDHash INTEGER ) ORDER BY CounterID, EventDate, UserID, EventTime;
) ORDER BY CounterID, EventDate, UserIDHash, EventTime;
\set input_file '''/opt/dumps/hits_1000m_corrected.tsv'''
\set input_file '''/opt/dump/dump_0.3/dump_hits_1b_meshed.tsv''' COPY hits_1000m FROM :input_file DELIMITER E'\t' DIRECT;
COPY hits_1b_meshed FROM :input_file DELIMITER E'\t' DIRECT;

View File

@ -1,111 +1,43 @@
SELECT count(*) FROM hits_100m_meshed; SELECT count(*) FROM {table};
SELECT count(*) FROM hits_100m_meshed WHERE AdvEngineID != 0; SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_100m_meshed; SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum_float(UserID) FROM hits_100m_meshed; SELECT sum_float(UserID) FROM {table};
SELECT count(DISTINCT UserID) FROM hits_100m_meshed; SELECT count(DISTINCT UserID) FROM {table};
SELECT count(DISTINCT SearchPhrase) FROM hits_100m_meshed; SELECT count(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM hits_100m_meshed; SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT AdvEngineID, count(*) FROM hits_100m_meshed WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC; SELECT RegionID, count(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.; SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10;
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_100m_meshed GROUP BY RegionID ORDER BY u DESC LIMIT 10; SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
-- агрегация, среднее количество ключей.; SELECT SearchPhrase, count(*) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_100m_meshed GROUP BY RegionID ORDER BY count(*) DESC LIMIT 10; SELECT SearchEngineID, SearchPhrase, count(*) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- агрегация, среднее количество ключей, несколько агрегатных функций.; SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_100m_meshed WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
-- мощная фильтрация по строкам, затем агрегация по строкам.; SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890;
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_100m_meshed WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.; SELECT SearchPhrase, MAX(URL), count(*) FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT SearchPhrase, count(*) FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10; SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.; SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-- агрегация чуть сложнее.; SELECT CounterID, avg(length(URL)) AS l, count(*) FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS key, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT SearchEngineID, SearchPhrase, count(*) FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY count(*) DESC LIMIT 10; SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
-- агрегация по числу и строке, большое количество ключей.; SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, count(*) FROM hits_100m_meshed GROUP BY UserID ORDER BY count(*) DESC LIMIT 10; SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- агрегация по очень большому количеству ключей, может не хватить оперативки.; SELECT URL, count(*) FROM {table} GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
SELECT 1, URL, count(*) FROM {table} GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_meshed GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10; SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
-- ещё более сложная агрегация.; SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_meshed GROUP BY UserID, SearchPhrase LIMIT 10; SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
-- то же самое, но без сортировки.; SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT UserID, Minute(EventTime) AS m, SearchPhrase, count(*) FROM hits_100m_meshed GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10; SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.; SELECT TIME_SLICE(EventTime, 1, 'MINUTE') AS Minute, count(*) AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;
SELECT UserID FROM hits_100m_meshed WHERE UserID = 12345678901234567890;
-- мощная фильтрация по столбцу типа UInt64.;
SELECT count(*) FROM hits_100m_meshed WHERE URL LIKE '%metrika%';
-- фильтрация по поиску подстроки в строке.;
SELECT SearchPhrase, MAX(URL), count(*) FROM hits_100m_meshed WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- вынимаем большие столбцы, фильтрация по строке.;
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_100m_meshed WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY count(*) DESC LIMIT 10;
-- чуть больше столбцы.;
SELECT * FROM hits_100m_meshed WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
-- плохой запрос - вынимаем все столбцы.;
SELECT SearchPhrase FROM hits_100m_meshed WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
-- большая сортировка.;
SELECT SearchPhrase FROM hits_100m_meshed WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
-- большая сортировка по строкам.;
SELECT SearchPhrase FROM hits_100m_meshed WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
-- большая сортировка по кортежу.;
SELECT CounterID, avg(length(URL)) AS l, count(*) FROM hits_100m_meshed WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- считаем средние длины URL для крупных счётчиков.;
SELECT SUBSTRING(SUBSTRING(Referer, POSITION('//' IN Referer) + 2), 1, GREATEST(0, POSITION('/' IN SUBSTRING(Referer, POSITION('//' IN Referer) + 2)) - 1)) AS key, avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_10m_meshed WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
-- то же самое, но с разбивкой по доменам.;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_100m_meshed;
-- много тупых агрегатных функций.;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_meshed WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_100m_meshed GROUP BY WatchID, ClientIP ORDER BY count(*) DESC LIMIT 10;
-- то же самое, но ещё и без фильтрации.;
SELECT URL, count(*) FROM hits_100m_meshed GROUP BY URL ORDER BY count(*) DESC LIMIT 10;
-- агрегация по URL.;
SELECT 1, URL, count(*) FROM hits_100m_meshed GROUP BY 1, URL ORDER BY count(*) DESC LIMIT 10;
-- агрегация по URL и числу.;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) FROM hits_100m_meshed GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY count(*) DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN SearchEngineID = 0 AND AdvEngineID = 0 THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT TIME_SLICE(EventTime, 1, 'MINUTE') AS Minute, count(*) AS PageViews FROM hits_100m_meshed WHERE CounterID = 34 AND EventDate >= DATE('2013-07-01') AND EventDate <= DATE('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;

View File

@ -98,11 +98,11 @@ private:
for (size_t i = 0; i < nested_pools.size(); ++i) for (size_t i = 0; i < nested_pools.size(); ++i)
{ {
if (load_balancing == LoadBalancing::NEAREST_HOSTNAME) if (load_balancing == LoadBalancing::NEAREST_HOSTNAME)
nested_pools[i].priority = hostname_differences[i]; nested_pools[i].state.priority = hostname_differences[i];
else if (load_balancing == LoadBalancing::RANDOM) else if (load_balancing == LoadBalancing::RANDOM)
nested_pools[i].priority = 0; nested_pools[i].state.priority = 0;
else if (load_balancing == LoadBalancing::IN_ORDER) else if (load_balancing == LoadBalancing::IN_ORDER)
nested_pools[i].priority = i; nested_pools[i].state.priority = i;
else else
throw Exception("Unknown load_balancing_mode: " + toString(static_cast<int>(load_balancing)), ErrorCodes::LOGICAL_ERROR); throw Exception("Unknown load_balancing_mode: " + toString(static_cast<int>(load_balancing)), ErrorCodes::LOGICAL_ERROR);
} }

View File

@ -2,7 +2,6 @@
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include <algorithm>
/** Поиск подстроки в строке по алгоритму Вольницкого: /** Поиск подстроки в строке по алгоритму Вольницкого:
@ -40,6 +39,22 @@ private:
bool fallback; /// Нужно ли использовать fallback алгоритм. bool fallback; /// Нужно ли использовать fallback алгоритм.
/// fallback алгоритм
static const char * naive_memmem(const char * haystack, size_t haystack_size, const char * needle, size_t needle_size)
{
const char * pos = haystack;
const char * end = haystack + haystack_size;
while (nullptr != (pos = reinterpret_cast<const char *>(memchr(pos, needle[0], end - pos))) && pos + needle_size <= end)
{
if (0 == memcmp(pos, needle, needle_size))
return pos;
else
++pos;
}
return end;
}
public: public:
/** haystack_size_hint - ожидаемый суммарный размер haystack при вызовах search. Можно не указывать. /** haystack_size_hint - ожидаемый суммарный размер haystack при вызовах search. Можно не указывать.
* Если указать его достаточно маленьким, то будет использован fallback алгоритм, * Если указать его достаточно маленьким, то будет использован fallback алгоритм,
@ -83,8 +98,7 @@ public:
} }
if (fallback || haystack_size <= needle_size) if (fallback || haystack_size <= needle_size)
{ {
/// Как ни странно, std::search работает намного быстрее memmem из eglibc. return naive_memmem(haystack, haystack_size, needle, needle_size);
return std::search(haystack, haystack_end, needle, needle_end);
} }
/// Будем "прикладывать" needle к haystack и сравнивать n-грам из конца needle. /// Будем "прикладывать" needle к haystack и сравнивать n-грам из конца needle.
@ -106,7 +120,7 @@ public:
} }
/// Оставшийся хвостик. /// Оставшийся хвостик.
return std::search(pos - step + 1, haystack_end, needle, needle_end); return naive_memmem(pos - step + 1, haystack_end - (pos - step + 1), needle, needle_size);
} }
const unsigned char * search(const unsigned char * haystack, size_t haystack_size) const const unsigned char * search(const unsigned char * haystack, size_t haystack_size) const

View File

@ -72,7 +72,7 @@
#define DBMS_DISTRIBUTED_DIRECTORY_MONITOR_SLEEP_TIME_MS 100 #define DBMS_DISTRIBUTED_DIRECTORY_MONITOR_SLEEP_TIME_MS 100
/// Граница, на которых должны быть выровнены блоки для асинхронных файловых операций. /// Граница, на которых должны быть выровнены блоки для асинхронных файловых операций.
#define DEFAULT_AIO_FILE_BLOCK_SIZE 512 #define DEFAULT_AIO_FILE_BLOCK_SIZE 4096
#define ALWAYS_INLINE __attribute__((__always_inline__)) #define ALWAYS_INLINE __attribute__((__always_inline__))
#define NO_INLINE __attribute__((__noinline__)) #define NO_INLINE __attribute__((__noinline__))

View File

@ -283,7 +283,6 @@ namespace ErrorCodes
AIO_COMPLETION_ERROR, AIO_COMPLETION_ERROR,
AIO_READ_ERROR, AIO_READ_ERROR,
AIO_WRITE_ERROR, AIO_WRITE_ERROR,
AIO_UNALIGNED_SIZE_ERROR,
INDEX_NOT_USED, INDEX_NOT_USED,
POCO_EXCEPTION = 1000, POCO_EXCEPTION = 1000,

View File

@ -6,6 +6,7 @@
#include <statdaemons/Exception.h> #include <statdaemons/Exception.h>
#include <Poco/SharedPtr.h> #include <Poco/SharedPtr.h>
namespace Poco { class Logger; }
namespace DB namespace DB
{ {
@ -28,6 +29,7 @@ ExceptionPtr cloneCurrentException();
* Можно использовать в деструкторах в блоке catch (...). * Можно использовать в деструкторах в блоке catch (...).
*/ */
void tryLogCurrentException(const char * log_name); void tryLogCurrentException(const char * log_name);
void tryLogCurrentException(Poco::Logger * logger);
void rethrowFirstException(Exceptions & exceptions); void rethrowFirstException(Exceptions & exceptions);

View File

@ -32,6 +32,8 @@ public:
return res.str(); return res.str();
} }
RowInputStreamPtr & getRowInput() { return row_input; }
protected: protected:
Block readImpl() override; Block readImpl() override;

View File

@ -29,6 +29,35 @@ private:
bool with_names; bool with_names;
bool with_types; bool with_types;
DataTypes data_types; DataTypes data_types;
/// Для удобной диагностики в случае ошибки.
size_t row_num = 0;
/// Сколько байт было считано, не считая тех, что ещё в буфере.
size_t bytes_read_at_start_of_buffer_on_current_row = 0;
size_t bytes_read_at_start_of_buffer_on_prev_row = 0;
BufferBase::Position pos_of_current_row = nullptr;
BufferBase::Position pos_of_prev_row = nullptr;
/** В случае исключения при парсинге, вызывается эта функция.
* Она выполняет заново парсинг последних двух строк и выводит подробную информацию о том, что происходит.
*/
void printDiagnosticInfo(WriteBuffer & out);
void updateDiagnosticInfo()
{
++row_num;
bytes_read_at_start_of_buffer_on_prev_row = bytes_read_at_start_of_buffer_on_current_row;
bytes_read_at_start_of_buffer_on_current_row = istr.count() - istr.offset();
pos_of_prev_row = pos_of_current_row;
pos_of_current_row = istr.position();
}
bool parseRowAndPrintDiagnosticInfo(WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name);
}; };
} }

View File

@ -134,7 +134,7 @@ struct ConvertImpl<DataTypeDateTime, DataTypeDate, Name>
}; };
/** Отдельный случай для преобразования UInt32 или UInt64 в Date. /** Отдельный случай для преобразования (U)Int32 или (U)Int64 в Date.
* Если число меньше 65536, то оно понимается, как DayNum, а если больше - как unix timestamp. * Если число меньше 65536, то оно понимается, как DayNum, а если больше - как unix timestamp.
* Немного нелогично, что мы, по сути, помещаем две разные функции в одну. * Немного нелогично, что мы, по сути, помещаем две разные функции в одну.
* Но зато это позволяет поддержать распространённый случай, * Но зато это позволяет поддержать распространённый случай,
@ -142,7 +142,7 @@ struct ConvertImpl<DataTypeDateTime, DataTypeDate, Name>
* (иначе такое использование было бы распространённой ошибкой). * (иначе такое использование было бы распространённой ошибкой).
*/ */
template <typename FromDataType, typename Name> template <typename FromDataType, typename Name>
struct ConvertImplUInt32Or64ToDate struct ConvertImpl32Or64ToDate
{ {
typedef typename FromDataType::FieldType FromFieldType; typedef typename FromDataType::FieldType FromFieldType;
typedef DataTypeDate::FieldType ToFieldType; typedef DataTypeDate::FieldType ToFieldType;
@ -186,11 +186,10 @@ struct ConvertImplUInt32Or64ToDate
} }
}; };
template <typename Name> template <typename Name> struct ConvertImpl<DataTypeUInt32, DataTypeDate, Name> : ConvertImpl32Or64ToDate<DataTypeUInt32, Name> {};
struct ConvertImpl<DataTypeUInt32, DataTypeDate, Name> : ConvertImplUInt32Or64ToDate<DataTypeUInt32, Name> {}; template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDate, Name> : ConvertImpl32Or64ToDate<DataTypeUInt64, Name> {};
template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDate, Name> : ConvertImpl32Or64ToDate<DataTypeInt32, Name> {};
template <typename Name> template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDate, Name> : ConvertImpl32Or64ToDate<DataTypeInt64, Name> {};
struct ConvertImpl<DataTypeUInt64, DataTypeDate, Name> : ConvertImplUInt32Or64ToDate<DataTypeUInt64, Name> {};
/** Преобразование чисел, дат, дат-с-временем в строки: через форматирование. /** Преобразование чисел, дат, дат-с-временем в строки: через форматирование.

View File

@ -160,11 +160,16 @@ struct PositionUTF8Impl
/// Переводит выражение LIKE в regexp re2. Например, abc%def -> ^abc.*def$ /// Переводит выражение LIKE в regexp re2. Например, abc%def -> ^abc.*def$
inline String likePatternToRegexp(const String & pattern) inline String likePatternToRegexp(const String & pattern)
{ {
String res = "^"; String res;
res.reserve(pattern.size() * 2); res.reserve(pattern.size() * 2);
const char * pos = pattern.data(); const char * pos = pattern.data();
const char * end = pos + pattern.size(); const char * end = pos + pattern.size();
if (pos < end && *pos == '%')
++pos;
else
res = "^";
while (pos < end) while (pos < end)
{ {
switch (*pos) switch (*pos)
@ -174,7 +179,10 @@ inline String likePatternToRegexp(const String & pattern)
res += *pos; res += *pos;
break; break;
case '%': case '%':
res += ".*"; if (pos + 1 != end)
res += ".*";
else
return res;
break; break;
case '_': case '_':
res += "."; res += ".";
@ -347,6 +355,7 @@ struct MatchImpl
/// Текущий индекс в массиве строк. /// Текущий индекс в массиве строк.
size_t i = 0; size_t i = 0;
/// TODO Надо сделать так, чтобы searcher был общим на все вызовы функции.
Volnitsky searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos); Volnitsky searcher(strstr_pattern.data(), strstr_pattern.size(), end - pos);
/// Искать будем следующее вхождение сразу во всех строках. /// Искать будем следующее вхождение сразу во всех строках.
@ -369,14 +378,87 @@ struct MatchImpl
++i; ++i;
} }
/// Хвостик, в котором не может быть подстрок.
memset(&res[i], revert, (res.size() - i) * sizeof(res[0])); memset(&res[i], revert, (res.size() - i) * sizeof(res[0]));
} }
else else
{ {
const auto & regexp = Regexps::get<like, true>(pattern);
size_t size = offsets.size(); size_t size = offsets.size();
for (size_t i = 0; i < size; ++i)
res[i] = revert ^ regexp->match(reinterpret_cast<const char *>(&data[i != 0 ? offsets[i - 1] : 0]), (i != 0 ? offsets[i] - offsets[i - 1] : offsets[0]) - 1); const auto & regexp = Regexps::get<like, true>(pattern);
std::string required_substring;
bool is_trivial;
bool required_substring_is_prefix; /// для anchored выполнения регекспа.
regexp->getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix);
if (required_substring.empty())
{
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
res[i] = revert ^ regexp->getRE2()->Match(
re2_st::StringPiece(reinterpret_cast<const char *>(&data[prev_offset]), offsets[i] - prev_offset - 1),
0, offsets[i] - prev_offset - 1, re2_st::RE2::UNANCHORED, nullptr, 0);
prev_offset = offsets[i];
}
}
else
{
/// NOTE Это почти совпадает со случаем likePatternIsStrstr.
const UInt8 * begin = &data[0];
const UInt8 * pos = begin;
const UInt8 * end = pos + data.size();
/// Текущий индекс в массиве строк.
size_t i = 0;
Volnitsky searcher(required_substring.data(), required_substring.size(), end - pos);
/// Искать будем следующее вхождение сразу во всех строках.
while (pos < end && end != (pos = searcher.search(pos, end - pos)))
{
/// Определим, к какому индексу оно относится.
while (begin + offsets[i] < pos)
{
res[i] = revert;
++i;
}
/// Проверяем, что вхождение не переходит через границы строк.
if (pos + strstr_pattern.size() < begin + offsets[i])
{
/// И если не переходит - при необходимости, проверяем регекспом.
if (is_trivial)
res[i] = !revert;
else
{
const char * str_data = reinterpret_cast<const char *>(&data[i != 0 ? offsets[i - 1] : 0]);
size_t str_size = (i != 0 ? offsets[i] - offsets[i - 1] : offsets[0]) - 1;
if (required_substring_is_prefix)
res[i] = revert ^ regexp->getRE2()->Match(
re2_st::StringPiece(str_data, str_size),
reinterpret_cast<const char *>(pos) - str_data, str_size, re2_st::RE2::ANCHOR_START, nullptr, 0);
else
res[i] = revert ^ regexp->getRE2()->Match(
re2_st::StringPiece(str_data, str_size),
0, str_size, re2_st::RE2::UNANCHORED, nullptr, 0);
}
}
else
res[i] = revert;
pos = begin + offsets[i];
++i;
}
memset(&res[i], revert, (res.size() - i) * sizeof(res[0]));
}
} }
} }

View File

@ -99,6 +99,9 @@ protected:
*/ */
Buffer working_buffer; Buffer working_buffer;
/// Количество игнорируемых байтов с начальной позиции буфера working_buffer.
size_t working_buffer_offset = 0;
/// Позиция чтения/записи. /// Позиция чтения/записи.
Position pos; Position pos;

View File

@ -47,7 +47,8 @@ public:
if (!res) if (!res)
working_buffer.resize(0); working_buffer.resize(0);
pos = working_buffer.begin(); pos = working_buffer.begin() + working_buffer_offset;
working_buffer_offset = 0;
return res; return res;
} }

View File

@ -12,8 +12,7 @@
namespace DB namespace DB
{ {
/** Класс для асинхронной чтения данных. /** Класс для асинхронного чтения данных.
* Все размеры и смещения должны быть кратны DEFAULT_AIO_FILE_BLOCK_SIZE байтам.
*/ */
class ReadBufferAIO : public BufferWithOwnMemory<ReadBuffer> class ReadBufferAIO : public BufferWithOwnMemory<ReadBuffer>
{ {
@ -44,15 +43,16 @@ private:
BufferWithOwnMemory<ReadBuffer> fill_buffer; BufferWithOwnMemory<ReadBuffer> fill_buffer;
iocb request; iocb request;
std::vector<iocb *> request_ptrs; std::vector<iocb *> request_ptrs{&request};
std::vector<io_event> events; std::vector<io_event> events{1};
AIOContext aio_context; AIOContext aio_context{1};
const std::string filename; const std::string filename;
size_t max_bytes_read = std::numeric_limits<size_t>::max(); size_t max_bytes_read = std::numeric_limits<size_t>::max();
size_t total_bytes_read = 0; size_t total_bytes_read = 0;
size_t requested_byte_count = 0;
off_t pos_in_file = 0; off_t pos_in_file = 0;
int fd = -1; int fd = -1;

View File

@ -2,11 +2,13 @@
#include <DB/IO/WriteBuffer.h> #include <DB/IO/WriteBuffer.h>
#include <DB/IO/BufferWithOwnMemory.h> #include <DB/IO/BufferWithOwnMemory.h>
#include <DB/Core/Defines.h>
#include <statdaemons/AIO.h> #include <statdaemons/AIO.h>
#include <string> #include <string>
#include <unistd.h> #include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/uio.h>
namespace DB namespace DB
{ {
@ -46,15 +48,29 @@ private:
BufferWithOwnMemory<WriteBuffer> flush_buffer; BufferWithOwnMemory<WriteBuffer> flush_buffer;
iocb request; iocb request;
std::vector<iocb *> request_ptrs; std::vector<iocb *> request_ptrs{&request};
std::vector<io_event> events; std::vector<io_event> events{1};
AIOContext aio_context; AIOContext aio_context{1};
iovec iov[3];
Memory memory_page{DEFAULT_AIO_FILE_BLOCK_SIZE, DEFAULT_AIO_FILE_BLOCK_SIZE};
const std::string filename; const std::string filename;
off_t bytes_to_write = 0;
off_t truncation_count = 0;
/// Текущая позиция в файле.
off_t pos_in_file = 0; off_t pos_in_file = 0;
/// Максимальная достигнутая позиция в файле.
off_t max_pos_in_file = 0;
/// Файловый дескриптор для записи.
int fd = -1; int fd = -1;
/// Файловый дескриптор для чтения. Употребляется для невыровненных записей.
int fd2 = -1;
/// Асинхронная операция записи ещё не завершилась. /// Асинхронная операция записи ещё не завершилась.
bool is_pending_write = false; bool is_pending_write = false;

View File

@ -263,6 +263,9 @@ public:
const Dictionaries & getDictionaries() const; const Dictionaries & getDictionaries() const;
const ExternalDictionaries & getExternalDictionaries() const; const ExternalDictionaries & getExternalDictionaries() const;
void tryCreateDictionaries(bool throw_on_error = false) const;
void tryCreateExternalDictionaries(bool throw_on_error = false) const;
InterserverIOHandler & getInterserverIOHandler() { return shared->interserver_io_handler; } InterserverIOHandler & getInterserverIOHandler() { return shared->interserver_io_handler; }
/// Как другие серверы могут обратиться к этому для скачивания реплицируемых данных. /// Как другие серверы могут обратиться к этому для скачивания реплицируемых данных.

View File

@ -31,8 +31,10 @@ private:
void handleException() const void handleException(const bool throw_on_error) const
{ {
const auto exception_ptr = std::current_exception();
try try
{ {
throw; throw;
@ -40,18 +42,19 @@ private:
catch (const Poco::Exception & e) catch (const Poco::Exception & e)
{ {
LOG_ERROR(log, "Cannot load dictionary! You must resolve this manually. " << e.displayText()); LOG_ERROR(log, "Cannot load dictionary! You must resolve this manually. " << e.displayText());
return;
} }
catch (...) catch (...)
{ {
LOG_ERROR(log, "Cannot load dictionary! You must resolve this manually."); LOG_ERROR(log, "Cannot load dictionary! You must resolve this manually.");
return;
} }
if (throw_on_error)
std::rethrow_exception(exception_ptr);
} }
/// Обновляет справочники. /// Обновляет справочники.
void reloadImpl() void reloadImpl(const bool throw_on_error = false)
{ {
/** Если не удаётся обновить справочники, то несмотря на это, не кидаем исключение (используем старые справочники). /** Если не удаётся обновить справочники, то несмотря на это, не кидаем исключение (используем старые справочники).
* Если старых корректных справочников нет, то при использовании функций, которые от них зависят, * Если старых корректных справочников нет, то при использовании функций, которые от них зависят,
@ -61,42 +64,53 @@ private:
LOG_INFO(log, "Loading dictionaries."); LOG_INFO(log, "Loading dictionaries.");
auto & config = Poco::Util::Application::instance().config();
bool was_exception = false; bool was_exception = false;
try if (config.has(TechDataHierarchy::required_key))
{ {
MultiVersion<TechDataHierarchy>::Version new_tech_data_hierarchy = new TechDataHierarchy; try
tech_data_hierarchy.set(new_tech_data_hierarchy); {
} auto new_tech_data_hierarchy = std::make_unique<TechDataHierarchy>();
catch (...) tech_data_hierarchy.set(new_tech_data_hierarchy.release());
{ }
handleException(); catch (...)
was_exception = true; {
handleException(throw_on_error);
was_exception = true;
}
} }
try
{
MultiVersion<RegionsHierarchies>::Version new_regions_hierarchies = new RegionsHierarchies;
new_regions_hierarchies->reload();
regions_hierarchies.set(new_regions_hierarchies);
} if (config.has(RegionsHierarchies::required_key))
catch (...)
{ {
handleException(); try
was_exception = true; {
auto new_regions_hierarchies = std::make_unique<RegionsHierarchies>();
new_regions_hierarchies->reload();
regions_hierarchies.set(new_regions_hierarchies.release());
}
catch (...)
{
handleException(throw_on_error);
was_exception = true;
}
} }
try if (config.has(RegionsNames::required_key))
{ {
MultiVersion<RegionsNames>::Version new_regions_names = new RegionsNames; try
new_regions_names->reload(); {
regions_names.set(new_regions_names); auto new_regions_names = std::make_unique<RegionsNames>();
} new_regions_names->reload();
catch (...) regions_names.set(new_regions_names.release());
{ }
handleException(); catch (...)
was_exception = true; {
handleException(throw_on_error);
was_exception = true;
}
} }
if (!was_exception) if (!was_exception)
@ -119,10 +133,10 @@ private:
public: public:
/// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд. /// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд.
Dictionaries(int reload_period_ = 3600) Dictionaries(const bool throw_on_error, const int reload_period_ = 3600)
: reload_period(reload_period_), log(&Logger::get("Dictionaries")) : reload_period(reload_period_), log(&Logger::get("Dictionaries"))
{ {
reloadImpl(); reloadImpl(throw_on_error);
reloading_thread = std::thread([this] { reloadPeriodically(); }); reloading_thread = std::thread([this] { reloadPeriodically(); });
} }

View File

@ -59,8 +59,8 @@ private:
std::unordered_map<std::string, Poco::Timestamp> last_modification_times; std::unordered_map<std::string, Poco::Timestamp> last_modification_times;
void reloadImpl(); void reloadImpl(bool throw_on_error = false);
void reloadFromFile(const std::string & config_path); void reloadFromFile(const std::string & config_path, bool throw_on_error);
void reloadPeriodically() void reloadPeriodically()
{ {
@ -82,10 +82,10 @@ private:
public: public:
/// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд. /// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд.
ExternalDictionaries(Context & context) ExternalDictionaries(Context & context, const bool throw_on_error)
: context(context), log(&Logger::get("ExternalDictionaries")) : context(context), log(&Logger::get("ExternalDictionaries"))
{ {
reloadImpl(); reloadImpl(throw_on_error);
reloading_thread = std::thread{&ExternalDictionaries::reloadPeriodically, this}; reloading_thread = std::thread{&ExternalDictionaries::reloadPeriodically, this};
} }

View File

@ -293,7 +293,7 @@ public:
*/ */
void execute(Block & block, const ColumnNumbers & arguments, size_t result, bool negative) const; void execute(Block & block, const ColumnNumbers & arguments, size_t result, bool negative) const;
std::string describe() std::string describe() const
{ {
if (!ordered_set_elements) if (!ordered_set_elements)
return "{}"; return "{}";
@ -312,7 +312,7 @@ public:
} }
/// проверяет есть ли в Set элементы для заданного диапазона индекса /// проверяет есть ли в Set элементы для заданного диапазона индекса
BoolMask mayBeTrueInRange(const Range & range); BoolMask mayBeTrueInRange(const Range & range) const;
size_t getTotalRowCount() const { return data.getTotalRowCount(); } size_t getTotalRowCount() const { return data.getTotalRowCount(); }
size_t getTotalByteCount() const { return data.getTotalByteCount(); } size_t getTotalByteCount() const { return data.getTotalByteCount(); }

View File

@ -86,7 +86,7 @@ struct Settings
/** Включена ли компиляция запросов. */ \ /** Включена ли компиляция запросов. */ \
M(SettingBool, compile, false) \ M(SettingBool, compile, false) \
/** Количество одинаковых по структуре запросов перед тем, как инициируется их компиляция. */ \ /** Количество одинаковых по структуре запросов перед тем, как инициируется их компиляция. */ \
M(SettingUInt64, min_count_to_compile, 0) \ M(SettingUInt64, min_count_to_compile, 3) \
/** При каком количестве ключей, начинает использоваться двухуровневая агрегация. 0 - никогда не использовать. */ \ /** При каком количестве ключей, начинает использоваться двухуровневая агрегация. 0 - никогда не использовать. */ \
M(SettingUInt64, group_by_two_level_threshold, 100000) \ M(SettingUInt64, group_by_two_level_threshold, 100000) \
\ \

View File

@ -262,6 +262,16 @@ public:
}; };
class ASTSet; class ASTSet;
/** Условие на индекс.
*
* Состоит из условий на принадлежность ключа всевозможным диапазонам или множествам,
* а также логических связок AND/OR/NOT над этими условиями.
*
* Составляет reverse polish notation от этих условий
* и умеет вычислять (интерпретировать) её выполнимость над диапазонами ключа.
*/
class PKCondition class PKCondition
{ {
public: public:
@ -270,22 +280,20 @@ public:
/// Выполнимо ли условие в диапазоне ключей. /// Выполнимо ли условие в диапазоне ключей.
/// left_pk и right_pk должны содержать все поля из sort_descr в соответствующем порядке. /// left_pk и right_pk должны содержать все поля из sort_descr в соответствующем порядке.
bool mayBeTrueInRange(const Field * left_pk, const Field * right_pk); bool mayBeTrueInRange(const Field * left_pk, const Field * right_pk) const;
/// Выполнимо ли условие в полубесконечном (не ограниченном справа) диапазоне ключей. /// Выполнимо ли условие в полубесконечном (не ограниченном справа) диапазоне ключей.
/// left_pk должен содержать все поля из sort_descr в соответствующем порядке. /// left_pk должен содержать все поля из sort_descr в соответствующем порядке.
bool mayBeTrueAfter(const Field * left_pk); bool mayBeTrueAfter(const Field * left_pk) const;
bool alwaysTrue() /// Проверяет, что индекс не может быть использован.
{ bool alwaysUnknown() const;
return rpn.size() == 1 && rpn[0].function == RPNElement::FUNCTION_UNKNOWN;
}
/// Наложить дополнительное условие: значение в столбце column должно быть в диапазоне range. /// Наложить дополнительное условие: значение в столбце column должно быть в диапазоне range.
/// Возвращает, есть ли такой столбец в первичном ключе. /// Возвращает, есть ли такой столбец в первичном ключе.
bool addCondition(const String & column, const Range & range); bool addCondition(const String & column, const Range & range);
String toString(); String toString() const;
private: private:
/// Выражение хранится в виде обратной польской строки (Reverse Polish Notation). /// Выражение хранится в виде обратной польской строки (Reverse Polish Notation).
struct RPNElement struct RPNElement
@ -310,7 +318,7 @@ private:
RPNElement(Function function_, size_t key_column_, const Range & range_) RPNElement(Function function_, size_t key_column_, const Range & range_)
: function(function_), range(range_), key_column(key_column_) {} : function(function_), range(range_), key_column(key_column_) {}
String toString(); String toString() const;
Function function; Function function;
@ -320,13 +328,13 @@ private:
/// Для FUNCTION_IN_SET /// Для FUNCTION_IN_SET
ASTPtr in_function; ASTPtr in_function;
ASTSet * inFunctionToSet(); const ASTSet * inFunctionToSet() const;
}; };
typedef std::vector<RPNElement> RPN; typedef std::vector<RPNElement> RPN;
typedef std::map<String, size_t> ColumnIndices; typedef std::map<String, size_t> ColumnIndices;
bool mayBeTrueInRange(const Field * left_pk, const Field * right_pk, bool right_bounded); bool mayBeTrueInRange(const Field * left_pk, const Field * right_pk, bool right_bounded) const;
void traverseAST(ASTPtr & node, Block & block_with_constants); void traverseAST(ASTPtr & node, Block & block_with_constants);
bool atomFromAST(ASTPtr & node, Block & block_with_constants, RPNElement & out); bool atomFromAST(ASTPtr & node, Block & block_with_constants, RPNElement & out);

11
dbms/scripts/README Normal file
View File

@ -0,0 +1,11 @@
# How to create dictionaries for region* functions:
# 1. You need access to host ███████████.yandex-team.ru.
# 2. Do the following commands:
curl 'http://███████████.yandex-team.ru/?fields=id,parent_id,type,population' | tail -n+2 > regions_hierarchy.txt
curl 'http://███████████.yandex-team.ru/?fields=id,parent_id,type,population&new_parents=977:187' | tail -n+2 > regions_hierarchy_ua.txt
curl 'http://███████████.yandex-team.ru/?fields=id,ru_name' | tail -n+2 > regions_names_ru.txt
curl 'http://███████████.yandex-team.ru/?fields=id,uk_name' | tail -n+2 > regions_names_ua.txt
curl 'http://███████████.yandex-team.ru/?fields=id,by_name' | tail -n+2 > regions_names_by.txt
curl 'http://███████████.yandex-team.ru/?fields=id,kz_name' | tail -n+2 > regions_names_kz.txt
curl 'http://███████████.yandex-team.ru/?fields=id,tr_name' | tail -n+2 > regions_names_tr.txt

View File

@ -1,31 +0,0 @@
#!/usr/bin/perl -w
use strict;
use warnings;
use geobase;
sub get_population {
my $key = shift;
my $depth = shift || 0;
return 0 if ($depth > 100);
my $current = int($Region{$key}->{zip_old} || 0); # zip_old, не смотря на название, содержит население региона.
return $current if ($current);
my $sum_of_children = 0;
for my $child (@{$Region{$key}->{chld}}) {
$sum_of_children += get_population($child, $depth + 1);
}
return $sum_of_children;
}
foreach my $key (keys %Region) {
print $key . "\t"
. ($Region{$key}->{parents}[-1] || 0) . "\t"
. ($Region{$key}->{type} || 0) . "\t"
. get_population($key) . "\n";
}

View File

@ -1,25 +0,0 @@
#!/usr/bin/perl -w
use strict;
use warnings;
use geobase;
my @languages = ('ru', 'en', 'ua', 'by', 'kz', 'tr');
my @output_files = map { open(my $output, ">:encoding(UTF-8)", "regions_names_" . $_ . ".txt") || die $!; $output } @languages;
my %outputs;
@outputs{@languages} = @output_files;
foreach my $key (keys %Region) {
foreach my $lang (@languages) {
my $field = ( $lang eq 'ru' ? 'name' : $lang . '_name' );
my $name = $Region{$key}->{$field};
if ($name) {
$name =~ s/^\s+//;
$name =~ s/\s+$//;
$name =~ s/(\t|\n)/ /g;
if ($name ne '') {
print { $outputs{$lang} } $key . "\t" . $name . "\n";
}
}
}
}

View File

@ -38,6 +38,8 @@
#include <DB/IO/ReadBufferFromIStream.h> #include <DB/IO/ReadBufferFromIStream.h>
#include <DB/DataStreams/AsynchronousBlockInputStream.h> #include <DB/DataStreams/AsynchronousBlockInputStream.h>
#include <DB/DataStreams/BlockInputStreamFromRowInputStream.h>
#include <DB/DataStreams/TabSeparatedRowInputStream.h>
#include <DB/Parsers/ParserQuery.h> #include <DB/Parsers/ParserQuery.h>
#include <DB/Parsers/ASTSetQuery.h> #include <DB/Parsers/ASTSetQuery.h>
@ -57,6 +59,7 @@
#include <DB/Common/ExternalTable.h> #include <DB/Common/ExternalTable.h>
#include <DB/Common/UnicodeBar.h> #include <DB/Common/UnicodeBar.h>
#include <DB/Common/formatReadable.h> #include <DB/Common/formatReadable.h>
#include <DB/Columns/ColumnString.h>
/// http://en.wikipedia.org/wiki/ANSI_escape_code /// http://en.wikipedia.org/wiki/ANSI_escape_code
@ -93,6 +96,7 @@ private:
}; };
bool is_interactive = true; /// Использовать readline интерфейс или batch режим. bool is_interactive = true; /// Использовать readline интерфейс или batch режим.
bool print_time_to_stderr = false; /// В неинтерактивном режиме, выводить время выполнения в stderr.
bool stdin_is_not_tty = false; /// stdin - не терминал. bool stdin_is_not_tty = false; /// stdin - не терминал.
winsize terminal_size {}; /// Размер терминала - для вывода прогресс-бара. winsize terminal_size {}; /// Размер терминала - для вывода прогресс-бара.
@ -257,6 +261,9 @@ private:
if (is_interactive) if (is_interactive)
{ {
if (print_time_to_stderr)
throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS);
/// Отключаем tab completion. /// Отключаем tab completion.
rl_bind_key('\t', rl_insert); rl_bind_key('\t', rl_insert);
@ -557,6 +564,10 @@ private:
std::cout << std::endl << std::endl; std::cout << std::endl << std::endl;
} }
else if (print_time_to_stderr)
{
std::cerr << watch.elapsedSeconds() << "\n";
}
return true; return true;
} }
@ -679,13 +690,16 @@ private:
if (!insert->format.empty()) if (!insert->format.empty())
current_format = insert->format; current_format = insert->format;
BlockInputStreamPtr block_std_in = new AsynchronousBlockInputStream(context.getFormatFactory().getInput( BlockInputStreamPtr block_input = context.getFormatFactory().getInput(
current_format, buf, sample, insert_format_max_block_size, context.getDataTypeFactory())); current_format, buf, sample, insert_format_max_block_size, context.getDataTypeFactory());
block_std_in->readPrefix();
BlockInputStreamPtr async_block_input = new AsynchronousBlockInputStream(block_input);
async_block_input->readPrefix();
while (true) while (true)
{ {
Block block = block_std_in->read(); Block block = async_block_input->read();
connection->sendData(block); connection->sendData(block);
processed_rows += block.rows(); processed_rows += block.rows();
@ -693,7 +707,7 @@ private:
break; break;
} }
block_std_in->readSuffix(); async_block_input->readSuffix();
} }
@ -1022,7 +1036,9 @@ public:
("database,d", boost::program_options::value<std::string>(), "database") ("database,d", boost::program_options::value<std::string>(), "database")
("multiline,m", "multiline") ("multiline,m", "multiline")
("multiquery,n", "multiquery") ("multiquery,n", "multiquery")
("vertical,E", "vertical") ("format,f", boost::program_options::value<std::string>(), "default output format")
("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command")
("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)")
APPLY_FOR_SETTINGS(DECLARE_SETTING) APPLY_FOR_SETTINGS(DECLARE_SETTING)
APPLY_FOR_LIMITS(DECLARE_LIMIT) APPLY_FOR_LIMITS(DECLARE_LIMIT)
; ;
@ -1135,8 +1151,12 @@ public:
config().setBool("multiline", true); config().setBool("multiline", true);
if (options.count("multiquery")) if (options.count("multiquery"))
config().setBool("multiquery", true); config().setBool("multiquery", true);
if (options.count("format"))
config().setString("format", options["format"].as<std::string>());
if (options.count("vertical")) if (options.count("vertical"))
config().setBool("vertical", true); config().setBool("vertical", true);
if (options.count("time"))
print_time_to_stderr = true;
} }
}; };

View File

@ -53,6 +53,11 @@ inline std::string demangle(const char * const mangled, int & status)
} }
void tryLogCurrentException(const char * log_name) void tryLogCurrentException(const char * log_name)
{
tryLogCurrentException(&Logger::get(log_name));
}
void tryLogCurrentException(Poco::Logger * logger)
{ {
try try
{ {
@ -62,7 +67,7 @@ void tryLogCurrentException(const char * log_name)
{ {
try try
{ {
LOG_ERROR(&Logger::get(log_name), "Code: " << e.code() << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what() LOG_ERROR(logger, "Code: " << e.code() << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what()
<< ", Stack trace:\n\n" << e.getStackTrace().toString()); << ", Stack trace:\n\n" << e.getStackTrace().toString());
} }
catch (...) {} catch (...) {}
@ -71,7 +76,7 @@ void tryLogCurrentException(const char * log_name)
{ {
try try
{ {
LOG_ERROR(&Logger::get(log_name), "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() LOG_ERROR(logger, "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
<< ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what()); << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what());
} }
catch (...) {} catch (...) {}
@ -86,7 +91,7 @@ void tryLogCurrentException(const char * log_name)
if (status) if (status)
name += " (demangling status: " + toString(status) + ")"; name += " (demangling status: " + toString(status) + ")";
LOG_ERROR(&Logger::get(log_name), "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what()); LOG_ERROR(logger, "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what());
} }
catch (...) {} catch (...) {}
} }
@ -100,7 +105,7 @@ void tryLogCurrentException(const char * log_name)
if (status) if (status)
name += " (demangling status: " + toString(status) + ")"; name += " (demangling status: " + toString(status) + ")";
LOG_ERROR(&Logger::get(log_name), "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name); LOG_ERROR(logger, "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name);
} }
catch (...) {} catch (...) {}
} }

View File

@ -1,6 +1,8 @@
#include <DB/IO/ReadHelpers.h> #include <DB/IO/ReadHelpers.h>
#include <DB/IO/Operators.h>
#include <DB/DataStreams/TabSeparatedRowInputStream.h> #include <DB/DataStreams/TabSeparatedRowInputStream.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
namespace DB namespace DB
@ -43,29 +45,327 @@ void TabSeparatedRowInputStream::readPrefix()
} }
/** Проверка на распространённый случай ошибки - использование Windows перевода строки.
*/
static void checkForCarriageReturn(ReadBuffer & istr)
{
if (istr.position()[0] == '\r' || (istr.position() != istr.buffer().begin() && istr.position()[-1] == '\r'))
throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row."
"\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format."
" You must transform your file to Unix format."
"\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.",
ErrorCodes::INCORRECT_DATA);
}
bool TabSeparatedRowInputStream::read(Row & row) bool TabSeparatedRowInputStream::read(Row & row)
{ {
updateDiagnosticInfo();
size_t size = data_types.size(); size_t size = data_types.size();
row.resize(size); row.resize(size);
try
{
for (size_t i = 0; i < size; ++i)
{
if (i == 0 && istr.eof())
{
row.clear();
return false;
}
data_types[i]->deserializeTextEscaped(row[i], istr);
/// пропускаем разделители
if (i + 1 == size)
{
if (!istr.eof())
{
if (unlikely(row_num == 1))
checkForCarriageReturn(istr);
assertString("\n", istr);
}
}
else
assertString("\t", istr);
}
}
catch (Exception & e)
{
String verbose_diagnostic;
{
WriteBufferFromString diagnostic_out(verbose_diagnostic);
printDiagnosticInfo(diagnostic_out);
}
e.addMessage("\n" + verbose_diagnostic);
throw;
}
return true;
}
void TabSeparatedRowInputStream::printDiagnosticInfo(WriteBuffer & out)
{
/// Вывести подробную диагностику возможно лишь если последняя и предпоследняя строка ещё находятся в буфере для чтения.
size_t bytes_read_at_start_of_buffer = istr.count() - istr.offset();
if (bytes_read_at_start_of_buffer != bytes_read_at_start_of_buffer_on_prev_row)
{
out << "Could not print diagnostic info because two last rows aren't in buffer (rare case)\n";
return;
}
size_t max_length_of_column_name = 0;
for (size_t i = 0; i < sample.columns(); ++i)
if (sample.getByPosition(i).name.size() > max_length_of_column_name)
max_length_of_column_name = sample.getByPosition(i).name.size();
size_t max_length_of_data_type_name = 0;
for (size_t i = 0; i < sample.columns(); ++i)
if (sample.getByPosition(i).type->getName().size() > max_length_of_data_type_name)
max_length_of_data_type_name = sample.getByPosition(i).type->getName().size();
/// Откатываем курсор для чтения на начало предыдущей или текущей строки и парсим всё заново. Но теперь выводим подробную информацию.
if (pos_of_prev_row)
{
istr.position() = pos_of_prev_row;
out << "\nRow " << (row_num - 1) << ":\n";
if (!parseRowAndPrintDiagnosticInfo(out, max_length_of_column_name, max_length_of_data_type_name))
return;
}
else
{
if (!pos_of_current_row)
{
out << "Could not print diagnostic info because parsing of data hasn't started.\n";
return;
}
istr.position() = pos_of_current_row;
}
out << "\nRow " << row_num << ":\n";
parseRowAndPrintDiagnosticInfo(out, max_length_of_column_name, max_length_of_data_type_name);
out << "\n";
}
static void verbosePrintString(BufferBase::Position begin, BufferBase::Position end, WriteBuffer & out)
{
if (end == begin)
{
out << "<EMPTY>";
return;
}
out << "\"";
for (auto pos = begin; pos < end; ++pos)
{
switch (*pos)
{
case '\0':
out << "<ASCII NUL>";
break;
case '\b':
out << "<BACKSPACE>";
break;
case '\f':
out << "<FORM FEED>";
break;
case '\n':
out << "<LINE FEED>";
break;
case '\r':
out << "<CARRIAGE RETURN>";
break;
case '\t':
out << "<TAB>";
break;
case '\\':
out << "<BACKSLASH>";
break;
case '"':
out << "<DOUBLE QUOTE>";
break;
case '\'':
out << "<SINGLE QUOTE>";
break;
default:
{
if (*pos >= 0 && *pos < 32)
{
static const char * hex = "0123456789ABCDEF";
out << "<0x" << hex[*pos / 16] << hex[*pos % 16] << ">";
}
else
out << *pos;
}
}
}
out << "\"";
}
bool TabSeparatedRowInputStream::parseRowAndPrintDiagnosticInfo(
WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name)
{
size_t size = data_types.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
if (i == 0 && istr.eof()) if (i == 0 && istr.eof())
{ {
row.clear(); out << "<End of stream>\n";
return false; return false;
} }
data_types[i]->deserializeTextEscaped(row[i], istr); out << "Column " << i << ", " << std::string((i < 10 ? 2 : i < 100 ? 1 : 0), ' ')
<< "name: " << sample.getByPosition(i).name << ", " << std::string(max_length_of_column_name - sample.getByPosition(i).name.size(), ' ')
<< "type: " << data_types[i]->getName() << ", " << std::string(max_length_of_data_type_name - data_types[i]->getName().size(), ' ');
/// пропускаем разделители auto prev_position = istr.position();
std::exception_ptr exception;
Field field;
try
{
data_types[i]->deserializeTextEscaped(field, istr);
}
catch (...)
{
exception = std::current_exception();
}
auto curr_position = istr.position();
if (curr_position < prev_position)
throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR);
if (data_types[i]->isNumeric())
{
/// Пустая строка вместо числа.
if (curr_position == prev_position)
{
out << "ERROR: text ";
verbosePrintString(prev_position, std::min(prev_position + 10, istr.buffer().end()), out);
out << " is not like " << data_types[i]->getName() << "\n";
return false;
}
}
out << "parsed text: ";
verbosePrintString(prev_position, curr_position, out);
if (exception)
{
if (data_types[i]->getName() == "DateTime")
out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss format.\n";
else if (data_types[i]->getName() == "Date")
out << "ERROR: Date must be in YYYY-MM-DD format.\n";
else
out << "ERROR\n";
return false;
}
out << "\n";
if (data_types[i]->isNumeric())
{
if (*curr_position != '\n' && *curr_position != '\t')
{
out << "ERROR: garbage after " << data_types[i]->getName() << ": ";
verbosePrintString(curr_position, std::min(curr_position + 10, istr.buffer().end()), out);
out << "\n";
if (data_types[i]->getName() == "DateTime")
out << "ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss format.\n";
else if (data_types[i]->getName() == "Date")
out << "ERROR: Date must be in YYYY-MM-DD format.\n";
return false;
}
}
if ( (typeid_cast<const DataTypeUInt8 *>(data_types[i].get()) && field.get<UInt64>() > std::numeric_limits<UInt8>::max())
|| (typeid_cast<const DataTypeUInt16 *>(data_types[i].get()) && field.get<UInt64>() > std::numeric_limits<UInt16>::max())
|| (typeid_cast<const DataTypeUInt32 *>(data_types[i].get()) && field.get<UInt64>() > std::numeric_limits<UInt32>::max())
|| (typeid_cast<const DataTypeInt8 *>(data_types[i].get())
&& (field.get<Int64>() > std::numeric_limits<Int8>::max() || field.get<Int64>() < std::numeric_limits<Int8>::min()))
|| (typeid_cast<const DataTypeInt16 *>(data_types[i].get())
&& (field.get<Int64>() > std::numeric_limits<Int16>::max() || field.get<Int64>() < std::numeric_limits<Int16>::min()))
|| (typeid_cast<const DataTypeInt32 *>(data_types[i].get())
&& (field.get<Int64>() > std::numeric_limits<Int32>::max() || field.get<Int64>() < std::numeric_limits<Int32>::min())))
{
out << "ERROR: parsed number is out of range of data type.\n";
return false;
}
/// Разделители
if (i + 1 == size) if (i + 1 == size)
{ {
if (!istr.eof()) if (!istr.eof())
assertString("\n", istr); {
try
{
assertString("\n", istr);
}
catch (const DB::Exception &)
{
if (*istr.position() == '\t')
{
out << "ERROR: Tab found where line feed is expected."
" It's like your file has more columns than expected.\n"
"And if your file have right number of columns, maybe it have unescaped tab in value.\n";
}
else if (*istr.position() == '\r')
{
out << "ERROR: Carriage return found where line feed is expected."
" It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n";
}
else
{
out << "ERROR: There is no line feed. ";
verbosePrintString(istr.position(), istr.position() + 1, out);
out << " found instead.\n";
}
return false;
}
}
} }
else else
assertString("\t", istr); {
try
{
assertString("\t", istr);
}
catch (const DB::Exception &)
{
if (*istr.position() == '\n')
{
out << "ERROR: Line feed found where tab is expected."
" It's like your file has less columns than expected.\n"
"And if your file have right number of columns, maybe it have unescaped backslash in value before tab, which cause tab has escaped.\n";
}
else if (*istr.position() == '\r')
{
out << "ERROR: Carriage return found where tab is expected.\n";
}
else
{
out << "ERROR: There is no tab. ";
verbosePrintString(istr.position(), istr.position() + 1, out);
out << " found instead.\n";
}
return false;
}
}
} }
return true; return true;

View File

@ -11,9 +11,9 @@ namespace DB
ReadBufferAIO::ReadBufferAIO(const std::string & filename_, size_t buffer_size_, int flags_, mode_t mode_, ReadBufferAIO::ReadBufferAIO(const std::string & filename_, size_t buffer_size_, int flags_, mode_t mode_,
char * existing_memory_) char * existing_memory_)
: BufferWithOwnMemory(buffer_size_, existing_memory_, DEFAULT_AIO_FILE_BLOCK_SIZE), : BufferWithOwnMemory<ReadBuffer>(buffer_size_, existing_memory_, DEFAULT_AIO_FILE_BLOCK_SIZE),
fill_buffer(BufferWithOwnMemory(buffer_size_, nullptr, DEFAULT_AIO_FILE_BLOCK_SIZE)), fill_buffer(BufferWithOwnMemory(buffer_size_, nullptr, DEFAULT_AIO_FILE_BLOCK_SIZE)),
request_ptrs{ &request }, events(1), filename(filename_) filename(filename_)
{ {
ProfileEvents::increment(ProfileEvents::FileOpen); ProfileEvents::increment(ProfileEvents::FileOpen);
@ -56,19 +56,11 @@ void ReadBufferAIO::setMaxBytes(size_t max_bytes_read_)
got_exception = true; got_exception = true;
throw Exception("Illegal attempt to set the maximum number of bytes to read from file " + filename, ErrorCodes::LOGICAL_ERROR); throw Exception("Illegal attempt to set the maximum number of bytes to read from file " + filename, ErrorCodes::LOGICAL_ERROR);
} }
if ((max_bytes_read_ % DEFAULT_AIO_FILE_BLOCK_SIZE) != 0)
{
got_exception = true;
throw Exception("Invalid maximum number of bytes to read from file " + filename, ErrorCodes::AIO_UNALIGNED_SIZE_ERROR);
}
max_bytes_read = max_bytes_read_; max_bytes_read = max_bytes_read_;
} }
off_t ReadBufferAIO::seek(off_t off, int whence) off_t ReadBufferAIO::seek(off_t off, int whence)
{ {
if ((off % DEFAULT_AIO_FILE_BLOCK_SIZE) != 0)
throw Exception("Invalid offset for ReadBufferAIO::seek", ErrorCodes::AIO_UNALIGNED_SIZE_ERROR);
waitForAIOCompletion(); waitForAIOCompletion();
off_t new_pos; off_t new_pos;
@ -152,21 +144,33 @@ bool ReadBufferAIO::nextImpl()
if (is_eof) if (is_eof)
return true; return true;
/// Количество запрашиваемых байтов.
requested_byte_count = std::min(fill_buffer.internalBuffer().size(), max_bytes_read);
/// Для запроса выравниваем количество запрашиваемых байтов на границе следующего блока.
size_t effective_byte_count = requested_byte_count;
if ((effective_byte_count % DEFAULT_AIO_FILE_BLOCK_SIZE) != 0)
effective_byte_count += DEFAULT_AIO_FILE_BLOCK_SIZE - (effective_byte_count % DEFAULT_AIO_FILE_BLOCK_SIZE);
/// Также выравниваем позицию в файле на границе предыдущего блока.
off_t effective_pos_in_file = pos_in_file - (pos_in_file % DEFAULT_AIO_FILE_BLOCK_SIZE);
/// Создать запрос. /// Создать запрос.
request.aio_lio_opcode = IOCB_CMD_PREAD; request.aio_lio_opcode = IOCB_CMD_PREAD;
request.aio_fildes = fd; request.aio_fildes = fd;
request.aio_buf = reinterpret_cast<UInt64>(fill_buffer.internalBuffer().begin()); request.aio_buf = reinterpret_cast<UInt64>(fill_buffer.internalBuffer().begin());
request.aio_nbytes = std::min(fill_buffer.internalBuffer().size(), max_bytes_read); request.aio_nbytes = effective_byte_count;
request.aio_offset = pos_in_file; request.aio_offset = effective_pos_in_file;
request.aio_reqprio = 0;
/// Отправить запрос. /// Отправить запрос.
while (io_submit(aio_context.ctx, request_ptrs.size(), &request_ptrs[0]) < 0) while (io_submit(aio_context.ctx, request_ptrs.size(), &request_ptrs[0]) < 0)
{
if (errno != EINTR) if (errno != EINTR)
{ {
got_exception = true; got_exception = true;
throw Exception("Cannot submit request for asynchronous IO on file " + filename, ErrorCodes::AIO_SUBMIT_ERROR); throw Exception("Cannot submit request for asynchronous IO on file " + filename, ErrorCodes::AIO_SUBMIT_ERROR);
} }
}
is_pending_read = true; is_pending_read = true;
return true; return true;
@ -177,11 +181,13 @@ void ReadBufferAIO::waitForAIOCompletion()
if (is_pending_read) if (is_pending_read)
{ {
while (io_getevents(aio_context.ctx, events.size(), events.size(), &events[0], nullptr) < 0) while (io_getevents(aio_context.ctx, events.size(), events.size(), &events[0], nullptr) < 0)
{
if (errno != EINTR) if (errno != EINTR)
{ {
got_exception = true; got_exception = true;
throw Exception("Failed to wait for asynchronous IO completion on file " + filename, ErrorCodes::AIO_COMPLETION_ERROR); throw Exception("Failed to wait for asynchronous IO completion on file " + filename, ErrorCodes::AIO_COMPLETION_ERROR);
} }
}
is_pending_read = false; is_pending_read = false;
off_t bytes_read = events[0].res; off_t bytes_read = events[0].res;
@ -191,23 +197,28 @@ void ReadBufferAIO::waitForAIOCompletion()
got_exception = true; got_exception = true;
throw Exception("Asynchronous read error on file " + filename, ErrorCodes::AIO_READ_ERROR); throw Exception("Asynchronous read error on file " + filename, ErrorCodes::AIO_READ_ERROR);
} }
if ((bytes_read % DEFAULT_AIO_FILE_BLOCK_SIZE) != 0)
{
got_exception = true;
throw Exception("Received unaligned number of bytes from file " + filename, ErrorCodes::AIO_UNALIGNED_SIZE_ERROR);
}
if (pos_in_file > (std::numeric_limits<off_t>::max() - bytes_read)) if (pos_in_file > (std::numeric_limits<off_t>::max() - bytes_read))
{ {
got_exception = true; got_exception = true;
throw Exception("File position overflowed", ErrorCodes::LOGICAL_ERROR); throw Exception("File position overflowed", ErrorCodes::LOGICAL_ERROR);
} }
pos_in_file += bytes_read; /// Игнорируем излишние байты справа.
total_bytes_read += bytes_read; bytes_read = std::min(bytes_read, static_cast<off_t>(requested_byte_count));
if (bytes_read > 0) if (bytes_read > 0)
fill_buffer.buffer().resize(bytes_read); fill_buffer.buffer().resize(bytes_read);
if ((static_cast<size_t>(bytes_read) < fill_buffer.internalBuffer().size()) || (total_bytes_read == max_bytes_read)) if (static_cast<size_t>(bytes_read) < fill_buffer.internalBuffer().size())
is_eof = true;
/// Игнорируем излишние байты слева.
working_buffer_offset = pos_in_file % DEFAULT_AIO_FILE_BLOCK_SIZE;
bytes_read -= working_buffer_offset;
pos_in_file += bytes_read;
total_bytes_read += bytes_read;
if (total_bytes_read == max_bytes_read)
is_eof = true; is_eof = true;
} }
} }

View File

@ -1,7 +1,6 @@
#include <DB/IO/WriteBufferAIO.h> #include <DB/IO/WriteBufferAIO.h>
#include <DB/Common/ProfileEvents.h> #include <DB/Common/ProfileEvents.h>
#include <DB/Core/ErrorCodes.h> #include <DB/Core/ErrorCodes.h>
#include <DB/Core/Defines.h>
#include <limits> #include <limits>
#include <sys/types.h> #include <sys/types.h>
@ -12,9 +11,9 @@ namespace DB
WriteBufferAIO::WriteBufferAIO(const std::string & filename_, size_t buffer_size_, int flags_, mode_t mode_, WriteBufferAIO::WriteBufferAIO(const std::string & filename_, size_t buffer_size_, int flags_, mode_t mode_,
char * existing_memory_) char * existing_memory_)
: BufferWithOwnMemory(buffer_size_, existing_memory_, DEFAULT_AIO_FILE_BLOCK_SIZE), : BufferWithOwnMemory<WriteBuffer>(buffer_size_, existing_memory_, DEFAULT_AIO_FILE_BLOCK_SIZE),
flush_buffer(BufferWithOwnMemory(buffer_size_, nullptr, DEFAULT_AIO_FILE_BLOCK_SIZE)), flush_buffer(BufferWithOwnMemory(buffer_size_, nullptr, DEFAULT_AIO_FILE_BLOCK_SIZE)),
request_ptrs{ &request }, events(1), filename(filename_) filename(filename_)
{ {
ProfileEvents::increment(ProfileEvents::FileOpen); ProfileEvents::increment(ProfileEvents::FileOpen);
@ -29,6 +28,16 @@ WriteBufferAIO::WriteBufferAIO(const std::string & filename_, size_t buffer_size
throwFromErrno("Cannot open file " + filename, error_code); throwFromErrno("Cannot open file " + filename, error_code);
} }
ProfileEvents::increment(ProfileEvents::FileOpen);
fd2 = ::open(filename.c_str(), O_RDONLY, mode_);
if (fd2 == -1)
{
got_exception = true;
auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE;
throwFromErrno("Cannot open file " + filename, error_code);
}
::memset(&request, 0, sizeof(request)); ::memset(&request, 0, sizeof(request));
} }
@ -48,13 +57,12 @@ WriteBufferAIO::~WriteBufferAIO()
if (fd != -1) if (fd != -1)
::close(fd); ::close(fd);
if (fd2 != -1)
::close(fd2);
} }
off_t WriteBufferAIO::seek(off_t off, int whence) off_t WriteBufferAIO::seek(off_t off, int whence)
{ {
if ((off % DEFAULT_AIO_FILE_BLOCK_SIZE) != 0)
throw Exception("Invalid offset for WriteBufferAIO::seek", ErrorCodes::AIO_UNALIGNED_SIZE_ERROR);
flush(); flush();
if (whence == SEEK_SET) if (whence == SEEK_SET)
@ -89,6 +97,9 @@ off_t WriteBufferAIO::seek(off_t off, int whence)
throw Exception("WriteBufferAIO::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); throw Exception("WriteBufferAIO::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
} }
if (pos_in_file > max_pos_in_file)
max_pos_in_file = pos_in_file;
return pos_in_file; return pos_in_file;
} }
@ -99,9 +110,6 @@ off_t WriteBufferAIO::getPositionInFile()
void WriteBufferAIO::truncate(off_t length) void WriteBufferAIO::truncate(off_t length)
{ {
if ((length % DEFAULT_AIO_FILE_BLOCK_SIZE) != 0)
throw Exception("Invalid length for WriteBufferAIO::ftruncate", ErrorCodes::AIO_UNALIGNED_SIZE_ERROR);
flush(); flush();
int res = ::ftruncate(fd, length); int res = ::ftruncate(fd, length);
@ -139,57 +147,169 @@ void WriteBufferAIO::nextImpl()
waitForAIOCompletion(); waitForAIOCompletion();
swapBuffers(); swapBuffers();
/// Создать запрос. truncation_count = 0;
request.aio_lio_opcode = IOCB_CMD_PWRITE;
request.aio_fildes = fd;
request.aio_buf = reinterpret_cast<UInt64>(flush_buffer.buffer().begin());
request.aio_nbytes = flush_buffer.offset();
request.aio_offset = pos_in_file;
request.aio_reqprio = 0;
if ((request.aio_nbytes % DEFAULT_AIO_FILE_BLOCK_SIZE) != 0) /// Регион диска, в который хотим записать данные.
const off_t region_begin = pos_in_file;
const off_t region_end = pos_in_file + flush_buffer.offset();
const size_t region_size = region_end - region_begin;
/// Регион диска, в который действительно записываем данные.
const size_t region_left_padding = region_begin % DEFAULT_AIO_FILE_BLOCK_SIZE;
const size_t region_right_padding = (DEFAULT_AIO_FILE_BLOCK_SIZE - (region_end % DEFAULT_AIO_FILE_BLOCK_SIZE)) % DEFAULT_AIO_FILE_BLOCK_SIZE;
const off_t region_aligned_begin = region_begin - region_left_padding;
const off_t region_aligned_end = region_end + region_right_padding;
const size_t region_aligned_size = region_aligned_end - region_aligned_begin;
/// Буфер данных, которые хотим записать на диск.
const Position buffer_begin = flush_buffer.buffer().begin();
Position buffer_end = buffer_begin + region_size;
size_t buffer_size = buffer_end - buffer_begin;
const size_t buffer_capacity = flush_buffer.buffer().size();
/// Обработать буфер, чтобы он отражал структуру региона диска.
size_t excess_count = 0;
if (region_left_padding > 0)
{ {
got_exception = true; if ((region_left_padding + buffer_size) > buffer_capacity)
throw Exception("Illegal attempt to write unaligned data to file " + filename, ErrorCodes::AIO_UNALIGNED_SIZE_ERROR); {
excess_count = region_left_padding + buffer_size - buffer_capacity;
::memcpy(&memory_page[0], buffer_end - excess_count, excess_count);
::memset(&memory_page[excess_count], 0, memory_page.size() - excess_count);
buffer_size = buffer_capacity;
}
else
buffer_size += region_left_padding;
buffer_end = buffer_begin + buffer_size;
::memmove(buffer_begin + region_left_padding, buffer_begin, buffer_size - region_left_padding);
ssize_t read_count = ::pread(fd2, buffer_begin, region_left_padding, region_aligned_begin);
if (read_count < 0)
{
got_exception = true;
throw Exception("Read error", ErrorCodes::AIO_READ_ERROR);
}
::memset(buffer_begin + read_count, 0, region_left_padding - read_count);
} }
if (region_right_padding > 0)
{
Position from;
if (excess_count > 0)
from = &memory_page[excess_count];
else
from = buffer_end;
ssize_t read_count = ::pread(fd2, from, region_right_padding, region_end);
if (read_count < 0)
{
got_exception = true;
throw Exception("Read error", ErrorCodes::AIO_READ_ERROR);
}
truncation_count = region_right_padding - read_count;
if (from == buffer_end)
::memset(from + read_count, 0, truncation_count);
}
/// Создать запрос на асинхронную запись.
size_t i = 0;
iov[i].iov_base = buffer_begin;
iov[i].iov_len = ((excess_count > 0) ? buffer_capacity : region_aligned_size);
++i;
if (excess_count > 0)
{
iov[i].iov_base = &memory_page[0];
iov[i].iov_len = memory_page.size();
++i;
}
bytes_to_write = 0;
for (size_t j = 0; j < i; ++j)
{
if ((iov[j].iov_len > std::numeric_limits<off_t>::max()) ||
(static_cast<off_t>(iov[j].iov_len) > (std::numeric_limits<off_t>::max() - bytes_to_write)))
{
got_exception = true;
throw Exception("Overflow on bytes to write", ErrorCodes::LOGICAL_ERROR);
}
bytes_to_write += iov[j].iov_len;
}
request.aio_lio_opcode = IOCB_CMD_PWRITEV;
request.aio_fildes = fd;
request.aio_buf = reinterpret_cast<UInt64>(iov);
request.aio_nbytes = i;
request.aio_offset = region_aligned_begin;
/// Отправить запрос. /// Отправить запрос.
while (io_submit(aio_context.ctx, request_ptrs.size(), &request_ptrs[0]) < 0) while (io_submit(aio_context.ctx, request_ptrs.size(), &request_ptrs[0]) < 0)
{
if (errno != EINTR) if (errno != EINTR)
{ {
got_exception = true; got_exception = true;
throw Exception("Cannot submit request for asynchronous IO on file " + filename, ErrorCodes::AIO_SUBMIT_ERROR); throw Exception("Cannot submit request for asynchronous IO on file " + filename, ErrorCodes::AIO_SUBMIT_ERROR);
} }
}
is_pending_write = true; is_pending_write = true;
} }
void WriteBufferAIO::waitForAIOCompletion() void WriteBufferAIO::waitForAIOCompletion()
{ {
if (is_pending_write) if (!is_pending_write)
return;
while (io_getevents(aio_context.ctx, events.size(), events.size(), &events[0], nullptr) < 0)
{ {
while (io_getevents(aio_context.ctx, events.size(), events.size(), &events[0], nullptr) < 0) if (errno != EINTR)
if (errno != EINTR)
{
got_exception = true;
throw Exception("Failed to wait for asynchronous IO completion on file " + filename, ErrorCodes::AIO_COMPLETION_ERROR);
}
is_pending_write = false;
off_t bytes_written = events[0].res;
if ((bytes_written < 0) || (static_cast<size_t>(bytes_written) < flush_buffer.offset()))
{ {
got_exception = true; got_exception = true;
throw Exception("Asynchronous write error on file " + filename, ErrorCodes::AIO_WRITE_ERROR); throw Exception("Failed to wait for asynchronous IO completion on file " + filename, ErrorCodes::AIO_COMPLETION_ERROR);
} }
if (pos_in_file > (std::numeric_limits<off_t>::max() - bytes_written)) }
is_pending_write = false;
off_t bytes_written = events[0].res;
if (bytes_written < bytes_to_write)
{
got_exception = true;
throw Exception("Asynchronous write error on file " + filename, ErrorCodes::AIO_WRITE_ERROR);
}
bytes_written -= truncation_count;
off_t pos_offset = bytes_written - (pos_in_file - request.aio_offset);
if (pos_in_file > (std::numeric_limits<off_t>::max() - pos_offset))
{
got_exception = true;
throw Exception("File position overflowed", ErrorCodes::LOGICAL_ERROR);
}
pos_in_file += pos_offset;
if (pos_in_file > max_pos_in_file)
max_pos_in_file = pos_in_file;
if (truncation_count > 0)
{
/// Укоротить файл, чтобы удалить из него излишние нули.
int res = ::ftruncate(fd, max_pos_in_file);
if (res == -1)
{ {
got_exception = true; got_exception = true;
throw Exception("File position overflowed", ErrorCodes::LOGICAL_ERROR); throwFromErrno("Cannot truncate file " + filename, ErrorCodes::CANNOT_TRUNCATE_FILE);
} }
pos_in_file += bytes_written;
} }
} }

View File

@ -14,7 +14,7 @@ namespace
{ {
void run(); void run();
void prepare(std::string & directory, std::string & filename, std::string & buf); void prepare(size_t s, std::string & directory, std::string & filename, std::string & buf);
void die(const std::string & msg); void die(const std::string & msg);
void run_test(unsigned int num, const std::function<bool()> func); void run_test(unsigned int num, const std::function<bool()> func);
@ -22,14 +22,16 @@ bool test1(const std::string & filename);
bool test2(const std::string & filename, const std::string & buf); bool test2(const std::string & filename, const std::string & buf);
bool test3(const std::string & filename, const std::string & buf); bool test3(const std::string & filename, const std::string & buf);
bool test4(const std::string & filename, const std::string & buf); bool test4(const std::string & filename, const std::string & buf);
bool test5(const std::string & filename); bool test5(const std::string & filename, const std::string & buf);
bool test6(const std::string & filename, const std::string & buf); bool test6(const std::string & filename, const std::string & buf);
bool test7(const std::string & filename, const std::string & buf); bool test7(const std::string & filename, const std::string & buf);
bool test8(const std::string & filename); bool test8(const std::string & filename, const std::string & buf);
bool test9(const std::string & filename, const std::string & buf); bool test9(const std::string & filename, const std::string & buf);
bool test10(const std::string & filename, const std::string & buf); bool test10(const std::string & filename, const std::string & buf);
bool test11(const std::string & filename); bool test11(const std::string & filename);
bool test12(const std::string & filename, const std::string & buf); bool test12(const std::string & filename, const std::string & buf);
bool test13(const std::string & filename, const std::string & buf);
bool test14(const std::string & filename, const std::string & buf);
void run() void run()
{ {
@ -38,7 +40,12 @@ void run()
std::string directory; std::string directory;
std::string filename; std::string filename;
std::string buf; std::string buf;
prepare(directory, filename, buf); prepare(10 * DEFAULT_AIO_FILE_BLOCK_SIZE, directory, filename, buf);
std::string directory2;
std::string filename2;
std::string buf2;
prepare(2 * DEFAULT_AIO_FILE_BLOCK_SIZE - 3, directory2, filename2, buf2);
const std::vector<std::function<bool()> > tests = const std::vector<std::function<bool()> > tests =
{ {
@ -46,14 +53,16 @@ void run()
std::bind(test2, std::ref(filename), std::ref(buf)), std::bind(test2, std::ref(filename), std::ref(buf)),
std::bind(test3, std::ref(filename), std::ref(buf)), std::bind(test3, std::ref(filename), std::ref(buf)),
std::bind(test4, std::ref(filename), std::ref(buf)), std::bind(test4, std::ref(filename), std::ref(buf)),
std::bind(test5, std::ref(filename)), std::bind(test5, std::ref(filename), std::ref(buf)),
std::bind(test6, std::ref(filename), std::ref(buf)), std::bind(test6, std::ref(filename), std::ref(buf)),
std::bind(test7, std::ref(filename), std::ref(buf)), std::bind(test7, std::ref(filename), std::ref(buf)),
std::bind(test8, std::ref(filename)), std::bind(test8, std::ref(filename), std::ref(buf)),
std::bind(test9, std::ref(filename), std::ref(buf)), std::bind(test9, std::ref(filename), std::ref(buf)),
std::bind(test10, std::ref(filename), std::ref(buf)), std::bind(test10, std::ref(filename), std::ref(buf)),
std::bind(test11, std::ref(filename)), std::bind(test11, std::ref(filename)),
std::bind(test12, std::ref(filename), std::ref(buf)) std::bind(test12, std::ref(filename), std::ref(buf)),
std::bind(test13, std::ref(filename2), std::ref(buf2)),
std::bind(test14, std::ref(filename), std::ref(buf))
}; };
unsigned int num = 0; unsigned int num = 0;
@ -64,9 +73,10 @@ void run()
} }
fs::remove_all(directory); fs::remove_all(directory);
fs::remove_all(directory2);
} }
void prepare(std::string & directory, std::string & filename, std::string & buf) void prepare(size_t s, std::string & directory, std::string & filename, std::string & buf)
{ {
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
@ -172,20 +182,22 @@ bool test4(const std::string & filename, const std::string & buf)
return n_read == 0; return n_read == 0;
} }
bool test5(const std::string & filename) bool test5(const std::string & filename, const std::string & buf)
{ {
bool ok = false; std::string newbuf;
newbuf.resize(1 + (DEFAULT_AIO_FILE_BLOCK_SIZE >> 1));
try DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE);
{ in.setMaxBytes(1 + (DEFAULT_AIO_FILE_BLOCK_SIZE >> 1));
DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE);
in.setMaxBytes(DEFAULT_AIO_FILE_BLOCK_SIZE >> 1); size_t count = in.read(&newbuf[0], newbuf.length());
} if (count != newbuf.length())
catch (const DB::Exception &) return false;
{
ok = true; if (newbuf != buf.substr(0, newbuf.length()))
} return false;
return ok;
return true;
} }
bool test6(const std::string & filename, const std::string & buf) bool test6(const std::string & filename, const std::string & buf)
@ -222,21 +234,22 @@ bool test7(const std::string & filename, const std::string & buf)
return (newbuf == buf.substr(DEFAULT_AIO_FILE_BLOCK_SIZE)); return (newbuf == buf.substr(DEFAULT_AIO_FILE_BLOCK_SIZE));
} }
bool test8(const std::string & filename) bool test8(const std::string & filename, const std::string & buf)
{ {
bool ok = false; std::string newbuf;
newbuf.resize(DEFAULT_AIO_FILE_BLOCK_SIZE - 1);
try DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE);
{ (void) in.seek(DEFAULT_AIO_FILE_BLOCK_SIZE + 1, SEEK_CUR);
DB::ReadBufferAIO in(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE); size_t count = in.read(&newbuf[0], newbuf.length());
(void) in.seek(DEFAULT_AIO_FILE_BLOCK_SIZE + 1, SEEK_CUR);
}
catch (const DB::Exception &)
{
ok = true;
}
return ok; if (count != newbuf.length())
return false;
if (newbuf != buf.substr(DEFAULT_AIO_FILE_BLOCK_SIZE + 1, newbuf.length()))
return false;
return true;
} }
bool test9(const std::string & filename, const std::string & buf) bool test9(const std::string & filename, const std::string & buf)
@ -328,6 +341,37 @@ bool test12(const std::string & filename, const std::string & buf)
return ok; return ok;
} }
bool test13(const std::string & filename, const std::string & buf)
{
std::string newbuf;
newbuf.resize(2 * DEFAULT_AIO_FILE_BLOCK_SIZE - 3);
DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE);
size_t count1 = in.read(&newbuf[0], newbuf.length());
if (count1 != newbuf.length())
return false;
return true;
}
bool test14(const std::string & filename, const std::string & buf)
{
std::string newbuf;
newbuf.resize(1 + (DEFAULT_AIO_FILE_BLOCK_SIZE >> 1));
DB::ReadBufferAIO in(filename, DEFAULT_AIO_FILE_BLOCK_SIZE);
(void) in.seek(2, SEEK_SET);
in.setMaxBytes(3 + (DEFAULT_AIO_FILE_BLOCK_SIZE >> 1));
size_t count = in.read(&newbuf[0], newbuf.length());
if (count != newbuf.length())
return false;
if (newbuf != buf.substr(2, newbuf.length()))
return false;
return true;
}
} }
int main() int main()

View File

@ -19,6 +19,12 @@ bool test1();
bool test2(); bool test2();
bool test3(); bool test3();
bool test4(); bool test4();
bool test5();
bool test6();
bool test7();
bool test8();
bool test9();
bool test10();
void run() void run()
{ {
@ -27,7 +33,13 @@ void run()
test1, test1,
test2, test2,
test3, test3,
test4 test4,
test5,
test6,
test7,
test8,
test9,
test10
}; };
unsigned int num = 0; unsigned int num = 0;
@ -282,6 +294,324 @@ bool test4()
return true; return true;
} }
bool test5()
{
namespace fs = boost::filesystem;
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char pattern[] = "/tmp/fileXXXXXX";
char * dir = ::mkdtemp(pattern);
if (dir == nullptr)
die("Could not create directory");
const std::string directory = std::string(dir);
const std::string filename = directory + "/foo";
size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE;
std::string buf;
buf.reserve(n);
for (size_t i = 0; i < n; ++i)
buf += symbols[i % symbols.length()];
{
DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE);
if (out.getFileName() != filename)
return false;
if (out.getFD() == -1)
return false;
out.seek(1, SEEK_SET);
out.write(&buf[0], buf.length());
}
std::ifstream in(filename.c_str());
if (!in.is_open())
die("Could not open file");
std::string received{ std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>() };
in.close();
fs::remove_all(directory);
return received.substr(1) == buf;
}
bool test6()
{
namespace fs = boost::filesystem;
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char pattern[] = "/tmp/fileXXXXXX";
char * dir = ::mkdtemp(pattern);
if (dir == nullptr)
die("Could not create directory");
const std::string directory = std::string(dir);
const std::string filename = directory + "/foo";
size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE;
std::string buf;
buf.reserve(n);
for (size_t i = 0; i < n; ++i)
buf += symbols[i % symbols.length()];
std::string buf2 = "1111111111";
{
DB::WriteBufferAIO out(filename, 3 * DEFAULT_AIO_FILE_BLOCK_SIZE);
if (out.getFileName() != filename)
return false;
if (out.getFD() == -1)
return false;
out.seek(3, SEEK_SET);
out.write(&buf[0], buf.length());
out.seek(-2 * DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_CUR);
out.write(&buf2[0], buf2.length());
}
std::ifstream in(filename.c_str());
if (!in.is_open())
die("Could not open file");
std::string received{ std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>() };
in.close();
fs::remove_all(directory);
if (received.substr(3, 8 * DEFAULT_AIO_FILE_BLOCK_SIZE) != buf.substr(0, 8 * DEFAULT_AIO_FILE_BLOCK_SIZE))
return false;
if (received.substr(3 + 8 * DEFAULT_AIO_FILE_BLOCK_SIZE, 10) != buf2)
return false;
if (received.substr(13 + 8 * DEFAULT_AIO_FILE_BLOCK_SIZE) != buf.substr(10 + 8 * DEFAULT_AIO_FILE_BLOCK_SIZE))
return false;
return true;
}
bool test7()
{
namespace fs = boost::filesystem;
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char pattern[] = "/tmp/fileXXXXXX";
char * dir = ::mkdtemp(pattern);
if (dir == nullptr)
die("Could not create directory");
const std::string directory = std::string(dir);
const std::string filename = directory + "/foo";
size_t n = DEFAULT_AIO_FILE_BLOCK_SIZE;
std::string buf;
buf.reserve(n);
for (size_t i = 0; i < n; ++i)
buf += symbols[i % symbols.length()];
std::string buf2 = "1111111111";
{
DB::WriteBufferAIO out(filename, DEFAULT_AIO_FILE_BLOCK_SIZE);
if (out.getFileName() != filename)
return false;
if (out.getFD() == -1)
return false;
out.seek(3, SEEK_SET);
out.write(&buf[0], buf.length());
out.seek(3, SEEK_CUR);
out.write(&buf2[0], buf2.length());
}
std::ifstream in(filename.c_str());
if (!in.is_open())
die("Could not open file");
std::string received{ std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>() };
if (received.length() != (6 + buf.length() + buf2.length()))
return false;
if (received.substr(0, 3) != std::string(3, '\0'))
return false;
if (received.substr(3, buf.length()) != buf)
return false;
if (received.substr(3 + buf.length(), 3) != std::string(3, '\0'))
return false;
if (received.substr(6 + buf.length()) != buf2)
return false;
in.close();
fs::remove_all(directory);
return true;
}
bool test8()
{
namespace fs = boost::filesystem;
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char pattern[] = "/tmp/fileXXXXXX";
char * dir = ::mkdtemp(pattern);
if (dir == nullptr)
die("Could not create directory");
const std::string directory = std::string(dir);
const std::string filename = directory + "/foo";
std::string buf2 = "11111111112222222222";
{
// Minimal buffer size = 2 pages.
DB::WriteBufferAIO out(filename, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE);
if (out.getFileName() != filename)
return false;
if (out.getFD() == -1)
return false;
out.seek(DEFAULT_AIO_FILE_BLOCK_SIZE - (buf2.length() / 2), SEEK_SET);
out.write(&buf2[0], buf2.length());
}
std::ifstream in(filename.c_str());
if (!in.is_open())
die("Could not open file");
std::string received{ std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>() };
if (received.length() != 4106)
return false;
if (received.substr(0, 4086) != std::string(4086, '\0'))
return false;
if (received.substr(4086, 20) != buf2)
return false;
in.close();
fs::remove_all(directory);
return true;
}
bool test9()
{
namespace fs = boost::filesystem;
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char pattern[] = "/tmp/fileXXXXXX";
char * dir = ::mkdtemp(pattern);
if (dir == nullptr)
die("Could not create directory");
const std::string directory = std::string(dir);
const std::string filename = directory + "/foo";
std::string buf2 = "11111111112222222222";
{
// Minimal buffer size = 2 pages.
DB::WriteBufferAIO out(filename, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE);
if (out.getFileName() != filename)
return false;
if (out.getFD() == -1)
return false;
out.seek(2 * DEFAULT_AIO_FILE_BLOCK_SIZE - (buf2.length() / 2), SEEK_SET);
out.write(&buf2[0], buf2.length());
}
std::ifstream in(filename.c_str());
if (!in.is_open())
die("Could not open file");
std::string received{ std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>() };
if (received.length() != 8202)
return false;
if (received.substr(0, 8182) != std::string(8182, '\0'))
return false;
if (received.substr(8182, 20) != buf2)
return false;
in.close();
fs::remove_all(directory);
return true;
}
bool test10()
{
namespace fs = boost::filesystem;
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
char pattern[] = "/tmp/fileXXXXXX";
char * dir = ::mkdtemp(pattern);
if (dir == nullptr)
die("Could not create directory");
const std::string directory = std::string(dir);
const std::string filename = directory + "/foo";
size_t n = 3 * DEFAULT_AIO_FILE_BLOCK_SIZE;
std::string buf;
buf.reserve(n);
for (size_t i = 0; i < n; ++i)
buf += symbols[i % symbols.length()];
std::string buf2(DEFAULT_AIO_FILE_BLOCK_SIZE + 10, '1');
{
DB::WriteBufferAIO out(filename, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE);
if (out.getFileName() != filename)
return false;
if (out.getFD() == -1)
return false;
out.seek(3, SEEK_SET);
out.write(&buf[0], buf.length());
out.seek(-DEFAULT_AIO_FILE_BLOCK_SIZE, SEEK_CUR);
out.write(&buf2[0], buf2.length());
}
std::ifstream in(filename.c_str());
if (!in.is_open())
die("Could not open file");
std::string received{ std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>() };
in.close();
fs::remove_all(directory);
if (received.substr(3, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE) != buf.substr(0, 2 * DEFAULT_AIO_FILE_BLOCK_SIZE))
return false;
if (received.substr(3 + 2 * DEFAULT_AIO_FILE_BLOCK_SIZE, DEFAULT_AIO_FILE_BLOCK_SIZE + 10) != buf2)
return false;
return true;
}
} }
int main() int main()

View File

@ -218,6 +218,7 @@ void Compiler::compile(
" -I /usr/share/clickhouse/headers/libs/libcityhash/" " -I /usr/share/clickhouse/headers/libs/libcityhash/"
" -I /usr/share/clickhouse/headers/libs/libcommon/include/" " -I /usr/share/clickhouse/headers/libs/libcommon/include/"
" -I /usr/share/clickhouse/headers/libs/libdouble-conversion/" " -I /usr/share/clickhouse/headers/libs/libdouble-conversion/"
" -I /usr/share/clickhouse/headers/libs/libcpuid/include/"
" -I /usr/share/clickhouse/headers/libs/libmysqlxx/include/" " -I /usr/share/clickhouse/headers/libs/libmysqlxx/include/"
" -I /usr/share/clickhouse/headers/libs/libstatdaemons/include/" " -I /usr/share/clickhouse/headers/libs/libstatdaemons/include/"
" -I /usr/share/clickhouse/headers/libs/libstats/include/" " -I /usr/share/clickhouse/headers/libs/libstats/include/"

View File

@ -494,8 +494,7 @@ const Dictionaries & Context::getDictionaries() const
{ {
Poco::ScopedLock<Poco::Mutex> lock(shared->mutex); Poco::ScopedLock<Poco::Mutex> lock(shared->mutex);
if (!shared->dictionaries) tryCreateDictionaries();
shared->dictionaries = new Dictionaries;
return *shared->dictionaries; return *shared->dictionaries;
} }
@ -505,14 +504,25 @@ const ExternalDictionaries & Context::getExternalDictionaries() const
{ {
Poco::ScopedLock<Poco::Mutex> lock(shared->mutex); Poco::ScopedLock<Poco::Mutex> lock(shared->mutex);
tryCreateExternalDictionaries();
return *shared->external_dictionaries;
}
void Context::tryCreateDictionaries(const bool throw_on_error) const
{
if (!shared->dictionaries)
shared->dictionaries = new Dictionaries{throw_on_error};
}
void Context::tryCreateExternalDictionaries(const bool throw_on_error) const
{
if (!shared->external_dictionaries) if (!shared->external_dictionaries)
{ {
if (!this->global_context) if (!this->global_context)
throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR); throw Exception("Logical error: there is no global context", ErrorCodes::LOGICAL_ERROR);
shared->external_dictionaries = new ExternalDictionaries{*this->global_context}; shared->external_dictionaries = new ExternalDictionaries{*this->global_context, throw_on_error};
} }
return *shared->external_dictionaries;
} }

View File

@ -34,12 +34,12 @@ namespace
} }
} }
void ExternalDictionaries::reloadImpl() void ExternalDictionaries::reloadImpl(const bool throw_on_error)
{ {
const auto config_paths = getDictionariesConfigPaths(Poco::Util::Application::instance().config()); const auto config_paths = getDictionariesConfigPaths(Poco::Util::Application::instance().config());
for (const auto & config_path : config_paths) for (const auto & config_path : config_paths)
reloadFromFile(config_path); reloadFromFile(config_path, throw_on_error);
/// periodic update /// periodic update
for (auto & dictionary : dictionaries) for (auto & dictionary : dictionaries)
@ -109,7 +109,7 @@ void ExternalDictionaries::reloadImpl()
} }
} }
void ExternalDictionaries::reloadFromFile(const std::string & config_path) void ExternalDictionaries::reloadFromFile(const std::string & config_path, const bool throw_on_error)
{ {
const Poco::File config_file{config_path}; const Poco::File config_file{config_path};
@ -197,8 +197,9 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path)
} }
catch (...) catch (...)
{ {
const auto exception_ptr = std::current_exception();
if (!name.empty()) if (!name.empty())
stored_exceptions.emplace(name, std::current_exception()); stored_exceptions.emplace(name, exception_ptr);
try try
{ {
@ -219,6 +220,10 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path)
LOG_ERROR(log, config_path << ": cannot create external dictionary '" << name LOG_ERROR(log, config_path << ": cannot create external dictionary '" << name
<< "'! You must resolve this manually."); << "'! You must resolve this manually.");
} }
/// propagate exception
if (throw_on_error)
std::rethrow_exception(exception_ptr);
} }
} }
} }

View File

@ -558,7 +558,7 @@ void Set::executeArray(const ColumnArray * key_column, ColumnUInt8::Container_t
} }
BoolMask Set::mayBeTrueInRange(const Range & range) BoolMask Set::mayBeTrueInRange(const Range & range) const
{ {
if (!ordered_set_elements) if (!ordered_set_elements)
throw DB::Exception("Ordered set in not created."); throw DB::Exception("Ordered set in not created.");
@ -588,7 +588,10 @@ BoolMask Set::mayBeTrueInRange(const Range & range)
} }
else else
{ {
auto left_it = range.left_bounded ? std::lower_bound(ordered_set_elements->begin(), ordered_set_elements->end(), left) : ordered_set_elements->begin(); auto left_it = range.left_bounded
? std::lower_bound(ordered_set_elements->begin(), ordered_set_elements->end(), left)
: ordered_set_elements->begin();
if (range.left_bounded && !range.left_included && left_it != ordered_set_elements->end() && *left_it == left) if (range.left_bounded && !range.left_included && left_it != ordered_set_elements->end() && *left_it == left)
++left_it; ++left_it;
@ -599,7 +602,10 @@ BoolMask Set::mayBeTrueInRange(const Range & range)
} }
else else
{ {
auto right_it = range.right_bounded ? std::upper_bound(ordered_set_elements->begin(), ordered_set_elements->end(), right) : ordered_set_elements->end(); auto right_it = range.right_bounded
? std::upper_bound(ordered_set_elements->begin(), ordered_set_elements->end(), right)
: ordered_set_elements->end();
if (range.right_bounded && !range.right_included && right_it != ordered_set_elements->begin() && *(right_it--) == right) if (range.right_bounded && !range.right_included && right_it != ordered_set_elements->begin() && *(right_it--) == right)
--right_it; --right_it;
@ -613,13 +619,9 @@ BoolMask Set::mayBeTrueInRange(const Range & range)
--right_it; --right_it;
/// в диапазон не попадает ни одного ключа из in /// в диапазон не попадает ни одного ключа из in
if (*right_it < *left_it) if (*right_it < *left_it)
{
can_be_true = false; can_be_true = false;
}
else else
{
can_be_true = true; can_be_true = true;
}
} }
} }
} }

View File

@ -59,10 +59,12 @@ int main(int argc, char ** argv)
} }
Context context; Context context;
NamesAndTypesList columns; NamesAndTypesList columns
columns.emplace_back("x", new DataTypeInt16); {
columns.emplace_back("s1", new DataTypeString); {"x", new DataTypeInt16},
columns.emplace_back("s2", new DataTypeString); {"s1", new DataTypeString},
{"s2", new DataTypeString}
};
context.setColumns(columns); context.setColumns(columns);
ExpressionAnalyzer analyzer(ast, context, context.getColumns()); ExpressionAnalyzer analyzer(ast, context, context.getColumns());

View File

@ -111,6 +111,15 @@ bool ParserInsertQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Expected &
ParserWhiteSpaceOrComments ws_without_nl(false); ParserWhiteSpaceOrComments ws_without_nl(false);
ws_without_nl.ignore(pos, end); ws_without_nl.ignore(pos, end);
if (pos != end && *pos == ';')
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
"Example:\n\n"
"INSERT INTO t (x, y) FORMAT TabSeparated\n"
"1\tHello\n"
"2\tWorld\n"
"\n"
"Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
if (pos != end && *pos == '\n') if (pos != end && *pos == '\n')
++pos; ++pos;

View File

@ -609,9 +609,24 @@ int Server::main(const std::vector<std::string> & args)
if (olap_http_server) if (olap_http_server)
olap_http_server->start(); olap_http_server->start();
LOG_INFO(log, "Ready for connections."); /// try to load dictionaries immediately, throw on error and die
try
{
if (!config().getBool("dictionaries_lazy_load", true))
{
global_context->tryCreateDictionaries(true);
global_context->tryCreateExternalDictionaries(true);
}
waitForTerminationRequest(); LOG_INFO(log, "Ready for connections.");
waitForTerminationRequest();
}
catch (...)
{
LOG_ERROR(log, "Caught exception while loading dictionaries.");
tryLogCurrentException(log);
}
LOG_DEBUG(log, "Received termination signal. Waiting for current connections to close."); LOG_DEBUG(log, "Received termination signal. Waiting for current connections to close.");

View File

@ -83,7 +83,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(
PKCondition key_condition(query, context, data.getColumnsList(), data.getSortDescription()); PKCondition key_condition(query, context, data.getColumnsList(), data.getSortDescription());
PKCondition date_condition(query, context, data.getColumnsList(), SortDescription(1, SortColumnDescription(data.date_column_name, 1))); PKCondition date_condition(query, context, data.getColumnsList(), SortDescription(1, SortColumnDescription(data.date_column_name, 1)));
if (settings.force_index_by_date && date_condition.alwaysTrue()) if (settings.force_index_by_date && date_condition.alwaysUnknown())
throw Exception("Index by date is not used and setting 'force_index_by_date' is set.", ErrorCodes::INDEX_NOT_USED); throw Exception("Index by date is not used and setting 'force_index_by_date' is set.", ErrorCodes::INDEX_NOT_USED);
/// Выберем куски, в которых могут быть данные, удовлетворяющие date_condition, и которые подходят под условие на _part. /// Выберем куски, в которых могут быть данные, удовлетворяющие date_condition, и которые подходят под условие на _part.
@ -556,7 +556,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPkRange(
size_t marks_count = index.size() / key_size; size_t marks_count = index.size() / key_size;
/// Если индекс не используется. /// Если индекс не используется.
if (key_condition.alwaysTrue()) if (key_condition.alwaysUnknown())
{ {
res.push_back(MarkRange(0, marks_count)); res.push_back(MarkRange(0, marks_count));
} }

View File

@ -42,7 +42,7 @@ PKCondition::PKCondition(ASTPtr query, const Context & context_, const NamesAndT
if (select.prewhere_expression) if (select.prewhere_expression)
{ {
traverseAST(select.prewhere_expression, block_with_constants); traverseAST(select.prewhere_expression, block_with_constants);
rpn.push_back(RPNElement(RPNElement::FUNCTION_AND)); rpn.emplace_back(RPNElement::FUNCTION_AND);
} }
} }
else if (select.prewhere_expression) else if (select.prewhere_expression)
@ -51,7 +51,7 @@ PKCondition::PKCondition(ASTPtr query, const Context & context_, const NamesAndT
} }
else else
{ {
rpn.push_back(RPNElement(RPNElement::FUNCTION_UNKNOWN)); rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
} }
} }
@ -59,8 +59,8 @@ bool PKCondition::addCondition(const String & column, const Range & range)
{ {
if (!pk_columns.count(column)) if (!pk_columns.count(column))
return false; return false;
rpn.push_back(RPNElement(RPNElement::FUNCTION_IN_RANGE, pk_columns[column], range)); rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, pk_columns[column], range);
rpn.push_back(RPNElement(RPNElement::FUNCTION_AND)); rpn.emplace_back(RPNElement::FUNCTION_AND);
return true; return true;
} }
@ -224,7 +224,7 @@ bool PKCondition::operatorFromAST(ASTFunction * func, RPNElement & out)
return true; return true;
} }
String PKCondition::toString() String PKCondition::toString() const
{ {
String res; String res;
for (size_t i = 0; i < rpn.size(); ++i) for (size_t i = 0; i < rpn.size(); ++i)
@ -236,7 +236,7 @@ String PKCondition::toString()
return res; return res;
} }
bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk, bool right_bounded) bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk, bool right_bounded) const
{ {
/// Найдем диапазоны элементов ключа. /// Найдем диапазоны элементов ключа.
std::vector<Range> key_ranges(sort_descr.size(), Range()); std::vector<Range> key_ranges(sort_descr.size(), Range());
@ -264,10 +264,10 @@ bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk
std::vector<BoolMask> rpn_stack; std::vector<BoolMask> rpn_stack;
for (size_t i = 0; i < rpn.size(); ++i) for (size_t i = 0; i < rpn.size(); ++i)
{ {
RPNElement & element = rpn[i]; const auto & element = rpn[i];
if (element.function == RPNElement::FUNCTION_UNKNOWN) if (element.function == RPNElement::FUNCTION_UNKNOWN)
{ {
rpn_stack.push_back(BoolMask(true, true)); rpn_stack.emplace_back(true, true);
} }
else if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE || element.function == RPNElement::FUNCTION_IN_RANGE) else if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE || element.function == RPNElement::FUNCTION_IN_RANGE)
{ {
@ -275,15 +275,15 @@ bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk
bool intersects = element.range.intersectsRange(key_range); bool intersects = element.range.intersectsRange(key_range);
bool contains = element.range.containsRange(key_range); bool contains = element.range.containsRange(key_range);
rpn_stack.push_back(BoolMask(intersects, !contains)); rpn_stack.emplace_back(intersects, !contains);
if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE) if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
rpn_stack.back() = !rpn_stack.back(); rpn_stack.back() = !rpn_stack.back();
} }
else if (element.function == RPNElement::FUNCTION_IN_SET || element.function == RPNElement::FUNCTION_NOT_IN_SET) else if (element.function == RPNElement::FUNCTION_IN_SET || element.function == RPNElement::FUNCTION_NOT_IN_SET)
{ {
ASTFunction * in_func = typeid_cast<ASTFunction *>(element.in_function.get()); auto in_func = typeid_cast<const ASTFunction *>(element.in_function.get());
ASTs & args = typeid_cast<ASTExpressionList &>(*in_func->arguments).children; const ASTs & args = typeid_cast<const ASTExpressionList &>(*in_func->arguments).children;
ASTSet * ast_set = typeid_cast<ASTSet *>(args[1].get()); auto ast_set = typeid_cast<const ASTSet *>(args[1].get());
if (in_func && ast_set) if (in_func && ast_set)
{ {
const Range & key_range = key_ranges[element.key_column]; const Range & key_range = key_ranges[element.key_column];
@ -294,7 +294,7 @@ bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk
} }
else else
{ {
throw DB::Exception("Set for IN is not created yet!"); throw DB::Exception("Set for IN is not created yet!", ErrorCodes::LOGICAL_ERROR);
} }
} }
else if (element.function == RPNElement::FUNCTION_NOT) else if (element.function == RPNElement::FUNCTION_NOT)
@ -303,16 +303,16 @@ bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk
} }
else if (element.function == RPNElement::FUNCTION_AND) else if (element.function == RPNElement::FUNCTION_AND)
{ {
BoolMask arg1 = rpn_stack.back(); auto arg1 = rpn_stack.back();
rpn_stack.pop_back(); rpn_stack.pop_back();
BoolMask arg2 = rpn_stack.back(); auto arg2 = rpn_stack.back();
rpn_stack.back() = arg1 & arg2; rpn_stack.back() = arg1 & arg2;
} }
else if (element.function == RPNElement::FUNCTION_OR) else if (element.function == RPNElement::FUNCTION_OR)
{ {
BoolMask arg1 = rpn_stack.back(); auto arg1 = rpn_stack.back();
rpn_stack.pop_back(); rpn_stack.pop_back();
BoolMask arg2 = rpn_stack.back(); auto arg2 = rpn_stack.back();
rpn_stack.back() = arg1 | arg2; rpn_stack.back() = arg1 | arg2;
} }
else else
@ -325,27 +325,27 @@ bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk
return rpn_stack[0].can_be_true; return rpn_stack[0].can_be_true;
} }
bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk) bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk) const
{ {
return mayBeTrueInRange(left_pk, right_pk, true); return mayBeTrueInRange(left_pk, right_pk, true);
} }
bool PKCondition::mayBeTrueAfter(const Field * left_pk) bool PKCondition::mayBeTrueAfter(const Field * left_pk) const
{ {
return mayBeTrueInRange(left_pk, nullptr, false); return mayBeTrueInRange(left_pk, nullptr, false);
} }
ASTSet * PKCondition::RPNElement::inFunctionToSet() const ASTSet * PKCondition::RPNElement::inFunctionToSet() const
{ {
ASTFunction * in_func = typeid_cast<ASTFunction *>(in_function.get()); auto in_func = typeid_cast<const ASTFunction *>(in_function.get());
if (!in_func) if (!in_func)
return nullptr; return nullptr;
ASTs & args = typeid_cast<ASTExpressionList &>(*in_func->arguments).children; const ASTs & args = typeid_cast<const ASTExpressionList &>(*in_func->arguments).children;
ASTSet * ast_set = typeid_cast<ASTSet *>(args[1].get()); auto ast_set = typeid_cast<const ASTSet *>(args[1].get());
return ast_set; return ast_set;
} }
String PKCondition::RPNElement::toString() String PKCondition::RPNElement::toString() const
{ {
std::ostringstream ss; std::ostringstream ss;
switch (function) switch (function)
@ -374,4 +374,50 @@ String PKCondition::RPNElement::toString()
return "ERROR"; return "ERROR";
} }
} }
bool PKCondition::alwaysUnknown() const
{
std::vector<UInt8> rpn_stack;
for (size_t i = 0; i < rpn.size(); ++i)
{
const auto & element = rpn[i];
if (element.function == RPNElement::FUNCTION_UNKNOWN)
{
rpn_stack.push_back(true);
}
else if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE
|| element.function == RPNElement::FUNCTION_IN_RANGE
|| element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET)
{
rpn_stack.push_back(false);
}
else if (element.function == RPNElement::FUNCTION_NOT)
{
}
else if (element.function == RPNElement::FUNCTION_AND)
{
auto arg1 = rpn_stack.back();
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
rpn_stack.back() = arg1 & arg2;
}
else if (element.function == RPNElement::FUNCTION_OR)
{
auto arg1 = rpn_stack.back();
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
rpn_stack.back() = arg1 | arg2;
}
else
throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
}
return rpn_stack[0];
}
} }

View File

@ -1467,6 +1467,8 @@ void StorageReplicatedMergeTree::alterThread()
/// Если описание столбцов изменилось, обновим структуру таблицы локально. /// Если описание столбцов изменилось, обновим структуру таблицы локально.
if (changed_version) if (changed_version)
{ {
LOG_INFO(log, "Changed version of 'columns' node in ZooKeeper. Waiting for structure write lock.");
auto table_lock = lockStructureForAlter(); auto table_lock = lockStructureForAlter();
const auto columns_changed = columns != data.getColumnsListNonMaterialized(); const auto columns_changed = columns != data.getColumnsListNonMaterialized();

View File

@ -9,9 +9,8 @@ namespace DB
StorageSystemDatabases::StorageSystemDatabases(const std::string & name_) StorageSystemDatabases::StorageSystemDatabases(const std::string & name_)
: name(name_) : name(name_), columns{{"name", new DataTypeString}}
{ {
columns.emplace_back("name", new DataTypeString);
} }
StoragePtr StorageSystemDatabases::create(const std::string & name_) StoragePtr StorageSystemDatabases::create(const std::string & name_)

View File

@ -10,6 +10,7 @@
#include <DB/Dictionaries/IDictionary.h> #include <DB/Dictionaries/IDictionary.h>
#include <DB/Dictionaries/IDictionarySource.h> #include <DB/Dictionaries/IDictionarySource.h>
#include <DB/Dictionaries/DictionaryStructure.h> #include <DB/Dictionaries/DictionaryStructure.h>
#include <statdaemons/ext/map.hpp>
#include <mutex> #include <mutex>
namespace DB namespace DB
@ -87,15 +88,12 @@ BlockInputStreams StorageSystemDictionaries::read(
col_origin.column->insert(dict_info.second.second); col_origin.column->insert(dict_info.second.second);
const auto & dict_struct = dict_ptr->getStructure(); const auto & dict_struct = dict_ptr->getStructure();
Array attribute_names; col_attribute_names.column->insert(ext::map<Array>(dict_struct.attributes, [] (auto & attr) -> decltype(auto) {
Array attribute_types; return attr.name;
for (const auto & attribute : dict_struct.attributes) }));
{ col_attribute_types.column->insert(ext::map<Array>(dict_struct.attributes, [] (auto & attr) -> decltype(auto) {
attribute_names.push_back(attribute.name); return attr.type->getName();
attribute_types.push_back(attribute.type->getName()); }));
}
col_attribute_names.column->insert(attribute_names);
col_attribute_types.column->insert(attribute_types);
col_has_hierarchy.column->insert(UInt64{dict_ptr->hasHierarchy()}); col_has_hierarchy.column->insert(UInt64{dict_ptr->hasHierarchy()});
col_bytes_allocated.column->insert(dict_ptr->getBytesAllocated()); col_bytes_allocated.column->insert(dict_ptr->getBytesAllocated());
col_hit_rate.column->insert(dict_ptr->getHitRate()); col_hit_rate.column->insert(dict_ptr->getHitRate());

View File

@ -11,10 +11,13 @@ namespace DB
StorageSystemEvents::StorageSystemEvents(const std::string & name_) StorageSystemEvents::StorageSystemEvents(const std::string & name_)
: name(name_) : name(name_),
columns
{
{"event", new DataTypeString},
{"value", new DataTypeUInt64},
}
{ {
columns.emplace_back("event", new DataTypeString);
columns.emplace_back("value", new DataTypeUInt64);
} }
StoragePtr StorageSystemEvents::create(const std::string & name_) StoragePtr StorageSystemEvents::create(const std::string & name_)

View File

@ -54,9 +54,8 @@ private:
StorageSystemNumbers::StorageSystemNumbers(const std::string & name_, bool multithreaded_) StorageSystemNumbers::StorageSystemNumbers(const std::string & name_, bool multithreaded_)
: name(name_), multithreaded(multithreaded_) : name(name_), columns{{"number", new DataTypeUInt64}}, multithreaded(multithreaded_)
{ {
columns.emplace_back("number", new DataTypeUInt64);
} }
StoragePtr StorageSystemNumbers::create(const std::string & name_, bool multithreaded_) StoragePtr StorageSystemNumbers::create(const std::string & name_, bool multithreaded_)

View File

@ -12,9 +12,8 @@ namespace DB
StorageSystemOne::StorageSystemOne(const std::string & name_) StorageSystemOne::StorageSystemOne(const std::string & name_)
: name(name_) : name(name_), columns{{"dummy", new DataTypeUInt8}}
{ {
columns.emplace_back("dummy", new DataTypeUInt8);
} }
StoragePtr StorageSystemOne::create(const std::string & name_) StoragePtr StorageSystemOne::create(const std::string & name_)

View File

@ -14,21 +14,24 @@ namespace DB
StorageSystemParts::StorageSystemParts(const std::string & name_) StorageSystemParts::StorageSystemParts(const std::string & name_)
: name(name_) : name(name_),
{ columns
columns.emplace_back("partition", new DataTypeString); {
columns.emplace_back("name", new DataTypeString); {"partition", new DataTypeString},
columns.emplace_back("replicated", new DataTypeUInt8); {"name", new DataTypeString},
columns.emplace_back("active", new DataTypeUInt8); {"replicated", new DataTypeUInt8},
columns.emplace_back("marks", new DataTypeUInt64); {"active", new DataTypeUInt8},
columns.emplace_back("bytes", new DataTypeUInt64); {"marks", new DataTypeUInt64},
columns.emplace_back("modification_time", new DataTypeDateTime); {"bytes", new DataTypeUInt64},
columns.emplace_back("remove_time", new DataTypeDateTime); {"modification_time", new DataTypeDateTime},
columns.emplace_back("refcount", new DataTypeUInt32); {"remove_time", new DataTypeDateTime},
{"refcount", new DataTypeUInt32},
columns.emplace_back("database", new DataTypeString); {"database", new DataTypeString},
columns.emplace_back("table", new DataTypeString); {"table", new DataTypeString},
columns.emplace_back("engine", new DataTypeString); {"engine", new DataTypeString},
}
{
} }
StoragePtr StorageSystemParts::create(const std::string & name_) StoragePtr StorageSystemParts::create(const std::string & name_)

View File

@ -10,11 +10,14 @@ namespace DB
StorageSystemTables::StorageSystemTables(const std::string & name_) StorageSystemTables::StorageSystemTables(const std::string & name_)
: name(name_) : name(name_),
columns
{
{"database", new DataTypeString},
{"name", new DataTypeString},
{"engine", new DataTypeString},
}
{ {
columns.emplace_back("database", new DataTypeString);
columns.emplace_back("name", new DataTypeString);
columns.emplace_back("engine", new DataTypeString);
} }
StoragePtr StorageSystemTables::create(const std::string & name_) StoragePtr StorageSystemTables::create(const std::string & name_)
@ -32,10 +35,9 @@ static ColumnWithNameAndType getFilteredDatabases(ASTPtr query, const Context &
Block block; Block block;
block.insert(column); block.insert(column);
for (auto database_it = context.getDatabases().begin(); database_it != context.getDatabases().end(); ++database_it) for (const auto db : context.getDatabases())
{ column.column->insert(db.first);
column.column->insert(database_it->first);
}
VirtualColumnUtils::filterBlockWithQuery(query, block, context); VirtualColumnUtils::filterBlockWithQuery(query, block, context);
return block.getByPosition(0); return block.getByPosition(0);

View File

@ -31,8 +31,7 @@ int main(int argc, const char ** argv)
return 1; return 1;
} }
Context context; Context context;
NamesAndTypesList columns; NamesAndTypesList columns{{"key", new DataTypeUInt64}};
columns.emplace_back("key", new DataTypeUInt64);
SortDescription sort_descr; SortDescription sort_descr;
sort_descr.push_back(SortColumnDescription("key", 1)); sort_descr.push_back(SortColumnDescription("key", 1));

View File

@ -0,0 +1,3 @@
79628
79628
102851

View File

@ -0,0 +1,5 @@
/* Заметим, что запросы написаны так, как будто пользователь не понимает смысл символа _ в LIKE выражении. */
SELECT count() FROM test.hits WHERE URL LIKE '%/avtomobili_s_probegom/_%__%__%__%';
SELECT count() FROM test.hits WHERE URL LIKE '/avtomobili_s_probegom/_%__%__%__%';
SELECT count() FROM test.hits WHERE URL LIKE '%_/avtomobili_s_probegom/_%__%__%__%';
SELECT count() FROM test.hits WHERE URL LIKE '%avtomobili%';

View File

@ -34,6 +34,12 @@ PIDDIR=/var/run/$PROGRAM
PIDFILE_PREFIX=$PIDDIR/$PROGRAM PIDFILE_PREFIX=$PIDDIR/$PROGRAM
PIDFILE_RE="$PIDFILE_PREFIX[0-9]*.pid" PIDFILE_RE="$PIDFILE_PREFIX[0-9]*.pid"
SUPPORTED_COMMANDS="{start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload}"
is_supported_command()
{
echo $SUPPORTED_COMMANDS | grep -E "(\{|\|)$1(\||})" &> /dev/null
}
generate_program_name() generate_program_name()
{ {
if [ $NUMBER_OF_PROCESSES -eq 1 ]; then if [ $NUMBER_OF_PROCESSES -eq 1 ]; then
@ -66,7 +72,7 @@ specific_log_file_for_each_process()
find_pid_files() find_pid_files()
{ {
find $PIDDIR -regex "$PIDFILE_RE" [[ -e $PIDDIR ]] && find $PIDDIR -regex "$PIDFILE_RE"
} }
is_running() is_running()
@ -217,17 +223,6 @@ main()
stop) stop)
disable_cron && stop disable_cron && stop
;; ;;
status)
if [[ $(running_processes) -eq $NUMBER_OF_PROCESSES ]]; then
echo "$PROGRAM service is running"
else
if is_cron_disabled; then
echo "$PROGRAM service is stopped";
else
echo "$PROGRAM: $(($NUMBER_OF_PROCESSES - $(running_processes))) of $NUMBER_OF_PROCESSES processes unexpectedly terminated"
fi
fi
;;
restart) restart)
restart && enable_cron restart && enable_cron
;; ;;
@ -252,14 +247,35 @@ main()
condreload) condreload)
any_runs && restart any_runs && restart
;; ;;
*)
echo "Usage: ${0##*/} {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload}"
EXIT_STATUS=2
esac esac
exit $EXIT_STATUS exit $EXIT_STATUS
} }
status()
{
if [[ $(running_processes) -eq $NUMBER_OF_PROCESSES ]]; then
echo "$PROGRAM service is running"
else
if is_cron_disabled; then
echo "$PROGRAM service is stopped";
else
echo "$PROGRAM: $(($NUMBER_OF_PROCESSES - $(running_processes))) of $NUMBER_OF_PROCESSES processes unexpectedly terminated"
fi
fi
}
# выполняем команды, не нуждающиеся в блокировке
if ! is_supported_command "$1"; then
echo "Usage: ${0##*/} $SUPPORTED_COMMANDS"
exit 2
fi
if [[ "$1" == "status" ]]; then
status
exit 0
fi
( (
if flock -n 9; then if flock -n 9; then
main "$@" main "$@"