#!/usr/bin/env python3 import os import sys import requests CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient, requests_session_with_retries class Tester: """ - Creates test table with multiple integer columns - Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct """ def __init__(self, session, url, index_granularity, total_rows): self.session = session self.url = url self.index_granularity = index_granularity self.total_rows = total_rows self.reported_errors = set() self.repro_queries = [] def report_error(self): print("Repro steps:", "\n\n\t".join(self.repro_queries)) exit(1) def query(self, query_text, include_in_repro_steps=True, expected_data=None): self.repro_queries.append(query_text) resp = self.session.post(self.url, data=query_text) if resp.status_code != 200: # Group similar errors error = resp.text[0:40] if error not in self.reported_errors: self.reported_errors.add(error) print("Code:", resp.status_code) print("Result:", resp.text) self.report_error() result = resp.text # Check that the result is as expected if (not expected_data is None) and (int(result) != len(expected_data)): print("Expected {} rows, got {}".format(len(expected_data), result)) print("Expected data:" + str(expected_data)) self.report_error() if not include_in_repro_steps: self.repro_queries.pop() def check_data( self, all_data, c_range_start, c_range_end, d_range_start, d_range_end ): for to_select in [ "count()", "sum(e)", ]: # Test reading with and without column with default value self.query("SELECT {} FROM tab_02473;".format(to_select), False, all_data) delta = 10 for b_range_start in [0, delta]: for b_range_end in [self.total_rows - delta]: # , self.total_rows]: expected = all_data[ (all_data.a == 0) & (all_data.b > b_range_start) & (all_data.b <= b_range_end) ] self.query( "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format( to_select, b_range_start, b_range_end ), False, expected, ) expected = all_data[ (all_data.a == 0) & (all_data.b > b_range_start) & (all_data.b <= b_range_end) & (all_data.c > c_range_start) & (all_data.c <= c_range_end) ] self.query( "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;".format( to_select, b_range_start, b_range_end, c_range_start, c_range_end, ), False, expected, ) expected = all_data[ (all_data.a == 0) & (all_data.b > b_range_start) & (all_data.b <= b_range_end) & (all_data.c > c_range_start) & (all_data.c <= c_range_end) & (all_data.d > d_range_start) & (all_data.d <= d_range_end) ] self.query( "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;".format( to_select, b_range_start, b_range_end, c_range_start, c_range_end, d_range_start, d_range_end, ), False, expected, ) def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end): self.repro_queries = [] self.query( """ CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a)) ENGINE = MergeTree() ORDER BY (a, b) SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format( self.index_granularity ) ) self.query( "INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});".format( self.total_rows ) ) client = ClickHouseClient() all_data = client.query_return_df( "SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;" ) self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;") # After all data has been written add a column with default value self.query("ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;") self.check_data( all_data, c_range_start, c_range_end, d_range_start, d_range_end ) self.query("DROP TABLE tab_02473;") def main(): # Enable multiple prewhere read steps url = ( os.environ["CLICKHOUSE_URL"] + "&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1" ) default_index_granularity = 10 total_rows = 8 * default_index_granularity step = default_index_granularity session = requests_session_with_retries() for index_granularity in [default_index_granularity - 1, default_index_granularity]: tester = Tester(session, url, index_granularity, total_rows) # Test combinations of ranges of columns c and d for c_range_start in range(0, total_rows, int(2.3 * step)): for c_range_end in range( c_range_start + 3 * step, total_rows, int(2.1 * step) ): for d_range_start in range( int(0.5 * step), total_rows, int(2.7 * step) ): for d_range_end in range( d_range_start + 3 * step, total_rows, int(2.2 * step) ): tester.run_test( c_range_start, c_range_end, d_range_start, d_range_end ) if __name__ == "__main__": main()