ClickHouse/benchmark/greenplum/result_parser.py

#!/usr/bin/env python3


import sys
import json


def parse_block(block=[], options=[]):

    # print('block is here', block)
    # show_query = False
    # show_query = options.show_query
    result = []
    query = block[0].strip()
    if len(block) > 4:
        timing1 = block[1].strip().split()[1]
        timing2 = block[3].strip().split()[1]
        timing3 = block[5].strip().split()[1]
    else:
        timing1 = block[1].strip().split()[1]
        timing2 = block[2].strip().split()[1]
        timing3 = block[3].strip().split()[1]
    if options.show_queries:
        result.append(query)
    if not options.show_first_timings:
        result += [timing1, timing2, timing3]
    else:
        result.append(timing1)
    return result


def read_stats_file(options, fname):
    result = []
    int_result = []
    block = []
    time_count = 1
    with open(fname) as f:

        for line in f.readlines():

            if "SELECT" in line:
                if len(block) > 1:
                    result.append(parse_block(block, options))
                block = [line]
            elif "Time:" in line:
                block.append(line)

    return result


def compare_stats_files(options, arguments):
    result = []
    file_output = []
    pyplot_colors = ["y", "b", "g", "r"]
    for fname in arguments[1:]:
        file_output.append((read_stats_file(options, fname)))
    if len(file_output[0]) > 0:
        timings_count = len(file_output[0])
    for idx, data_set in enumerate(file_output):
        int_result = []
        for timing in data_set:
            int_result.append(float(timing[0]))  # y values
        result.append(
            [
                [x for x in range(0, len(int_result))],
                int_result,
                pyplot_colors[idx] + "^",
            ]
        )
    #        result.append([x for x in range(1, len(int_result)) ]) #x values
    #        result.append( pyplot_colors[idx] + '^' )

    return result


def parse_args():
    from optparse import OptionParser

    parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
    parser.add_option(
        "-q",
        "--show-queries",
        help="Show statements along with timings",
        action="store_true",
        dest="show_queries",
    )
    parser.add_option(
        "-f",
        "--show-first-timings",
        help="Show only first tries timings",
        action="store_true",
        dest="show_first_timings",
    )
    parser.add_option(
        "-c",
        "--compare-mode",
        help="Prepare output for pyplot comparing result files.",
        action="store",
        dest="compare_mode",
    )
    (options, arguments) = parser.parse_args(sys.argv)
    if len(arguments) < 2:
        parser.print_usage()
        sys.exit(1)
    return (options, arguments)


def gen_pyplot_code(options, arguments):
    result = ""
    data_sets = compare_stats_files(options, arguments)
    for idx, data_set in enumerate(data_sets, start=0):
        x_values, y_values, line_style = data_set
        result += "\nplt.plot("
        result += "%s, %s, '%s'" % (x_values, y_values, line_style)
        result += ", label='%s try')" % idx
    print("import matplotlib.pyplot as plt")
    print(result)
    print("plt.xlabel('Try number')")
    print("plt.ylabel('Timing')")
    print("plt.title('Benchmark query timings')")
    print("plt.legend()")
    print("plt.show()")


def gen_html_json(options, arguments):
    tuples = read_stats_file(options, arguments[1])
    print("{")
    print('"system:       GreenPlum(x2),')
    print(('"version":      "%s",' % "4.3.9.1"))
    print('"data_size":    10000000,')
    print('"time":         "",')
    print('"comments":     "",')
    print('"result":')
    print("[")
    for s in tuples:
        print(s)
    print("]")
    print("}")


def main():
    (options, arguments) = parse_args()
    if len(arguments) > 2:
        gen_pyplot_code(options, arguments)
    else:
        gen_html_json(options, arguments)


if __name__ == "__main__":
    main()
Convert to python3 (#15007) 2020-10-02 16:54:07 +00:00			`#!/usr/bin/env python3`

Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00
			`import sys`
			`import json`

Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`def parse_block(block=[], options=[]):`

Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`# print('block is here', block)`
			`# show_query = False`
			`# show_query = options.show_query`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`result = []`
			`query = block[0].strip()`
			`if len(block) > 4:`
			`timing1 = block[1].strip().split()[1]`
			`timing2 = block[3].strip().split()[1]`
			`timing3 = block[5].strip().split()[1]`
			`else:`
			`timing1 = block[1].strip().split()[1]`
			`timing2 = block[2].strip().split()[1]`
			`timing3 = block[3].strip().split()[1]`
Fool's day changed, continued [#CLICKHOUSE-3]. 2017-04-05 11:52:23 +00:00			`if options.show_queries:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`result.append(query)`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`if not options.show_first_timings:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`result += [timing1, timing2, timing3]`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`else:`
Fool's day changed, continued [#CLICKHOUSE-3]. 2017-04-05 11:52:23 +00:00			`result.append(timing1)`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`return result`


			`def read_stats_file(options, fname):`
			`result = []`
			`int_result = []`
			`block = []`
			`time_count = 1`
			`with open(fname) as f:`

			`for line in f.readlines():`

Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`if "SELECT" in line:`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`if len(block) > 1:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`result.append(parse_block(block, options))`
			`block = [line]`
			`elif "Time:" in line:`
			`block.append(line)`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00
Fool's day changed, continued [#CLICKHOUSE-3]. 2017-04-05 11:52:23 +00:00			`return result`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00

			`def compare_stats_files(options, arguments):`
			`result = []`
			`file_output = []`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`pyplot_colors = ["y", "b", "g", "r"]`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`for fname in arguments[1:]:`
			`file_output.append((read_stats_file(options, fname)))`
			`if len(file_output[0]) > 0:`
			`timings_count = len(file_output[0])`
			`for idx, data_set in enumerate(file_output):`
			`int_result = []`
			`for timing in data_set:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`int_result.append(float(timing[0])) # y values`
			`result.append(`
			`[`
			`[x for x in range(0, len(int_result))],`
			`int_result,`
			`pyplot_colors[idx] + "^",`
			`]`
			`)`
			`# result.append([x for x in range(1, len(int_result)) ]) #x values`
			`# result.append( pyplot_colors[idx] + '^' )`
Fool's day changed, continued [#CLICKHOUSE-3]. 2017-04-05 11:52:23 +00:00
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`return result`

Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`def parse_args():`
			`from optparse import OptionParser`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00
			`parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")`
			`parser.add_option(`
			`"-q",`
			`"--show-queries",`
			`help="Show statements along with timings",`
			`action="store_true",`
			`dest="show_queries",`
			`)`
			`parser.add_option(`
			`"-f",`
			`"--show-first-timings",`
			`help="Show only first tries timings",`
			`action="store_true",`
			`dest="show_first_timings",`
			`)`
			`parser.add_option(`
			`"-c",`
			`"--compare-mode",`
			`help="Prepare output for pyplot comparing result files.",`
			`action="store",`
			`dest="compare_mode",`
			`)`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`(options, arguments) = parser.parse_args(sys.argv)`
			`if len(arguments) < 2:`
			`parser.print_usage()`
			`sys.exit(1)`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`return (options, arguments)`

Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00
			`def gen_pyplot_code(options, arguments):`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`result = ""`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`data_sets = compare_stats_files(options, arguments)`
			`for idx, data_set in enumerate(data_sets, start=0):`
			`x_values, y_values, line_style = data_set`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`result += "\nplt.plot("`
			`result += "%s, %s, '%s'" % (x_values, y_values, line_style)`
			`result += ", label='%s try')" % idx`
			`print("import matplotlib.pyplot as plt")`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`print(result)`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`print("plt.xlabel('Try number')")`
			`print("plt.ylabel('Timing')")`
			`print("plt.title('Benchmark query timings')")`
			`print("plt.legend()")`
			`print("plt.show()")`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00

			`def gen_html_json(options, arguments):`
			`tuples = read_stats_file(options, arguments[1])`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`print("{")`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`print('"system: GreenPlum(x2),')`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`print(('"version": "%s",' % "4.3.9.1"))`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`print('"data_size": 10000000,')`
			`print('"time": "",')`
			`print('"comments": "",')`
			`print('"result":')`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`print("[")`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`for s in tuples:`
			`print(s)`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`print("]")`
			`print("}")`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00

			`def main():`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`(options, arguments) = parse_args()`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`if len(arguments) > 2:`
			`gen_pyplot_code(options, arguments)`
			`else:`
			`gen_html_json(options, arguments)`

Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00
			`if __name__ == "__main__":`
Greenplum benchmark test environment description and test results. 2016-12-16 09:08:25 +00:00			`main()`