ClickHouse/dbms/scripts/merge_algorithm/stats.py
Ravengg b079dacfd1 Added part_log with statistics scripts (#549)
* Added part_log

* first_test

* filter and hits_res

* Add renamer and drawer

* Add columns database and table into PartLog

* Add normal way to get table_name and database_name from part

* improve drawer

* add stats for random size parts

* Merge converter and drawer

* make drawer more informative

* add new data

* add new data

* new data

* add long range stats

* for checking best way

* Add add_parts script

* Good style for global merge

* delete commented code

* Fixed spaces to tabs

* Note that Stopwatch is started automatically.

* Style

* Update StorageMergeTree.cpp

* Update StorageReplicatedMergeTree.cpp

* Switch act_time_ms to duration_ms

* Added ability to disable part_log

* fixed getPartLog

* fix usage getPartLog

* fix
2017-03-07 21:13:54 +04:00

62 lines
1.5 KiB
Python

import time
import ast
from datetime import datetime
FILE='data.tsv'
def get_metrix():
data = []
time_to_merge = 0
count_of_parts = 0
max_count_of_parts = 0
parts_in_time = []
last_date = 0
for line in open(FILE):
fields = line.split('\t')
last_date = datetime.strptime(fields[2], '%Y-%m-%d %H:%M:%S')
break
for line in open(FILE):
fields = line.split('\t')
cur_date = datetime.strptime(fields[2], '%Y-%m-%d %H:%M:%S')
if fields[0] == '2':
time_to_merge += int(fields[4])
list = ast.literal_eval(fields[-1])
count_of_parts -= len(list) - 1
else:
count_of_parts += 1
if max_count_of_parts < count_of_parts:
max_count_of_parts = count_of_parts
parts_in_time.append([(cur_date-last_date).total_seconds(), count_of_parts])
last_date = cur_date
stats_parts_in_time = []
global_time = 0
average_parts = 0
for i in range(max_count_of_parts + 1):
stats_parts_in_time.append(0)
for elem in parts_in_time:
stats_parts_in_time[elem[1]] += elem[0]
global_time += elem[0]
average_parts += elem[0] * elem[1]
for i in range(max_count_of_parts):
stats_parts_in_time[i] /= global_time
average_parts /= global_time
return time_to_merge, max_count_of_parts, average_parts, stats_parts_in_time
def main():
time_to_merge, max_parts, average_parts, stats_parts = get_metrix()
print('time_to_merge=', time_to_merge)
print('max_parts=', max_parts)
print('average_parts=', average_parts)
print('stats_parts=', stats_parts)
if __name__ == '__main__':
main()