ClickHouse/tests/queries/0_stateless/01882_total_rows_approx.sh

#!/usr/bin/env bash

# Check that total_rows_approx (via http headers) includes all rows from
# all parts at the query start.
#
# At some point total_rows_approx was accounted only when the query starts
# reading the part, and so total_rows_approx wasn't reliable, even for simple
# SELECT FROM MergeTree()
# It was fixed by take total_rows_approx into account as soon as possible.

CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh

$CLICKHOUSE_CLIENT -q "drop table if exists data_01882"
$CLICKHOUSE_CLIENT -q "create table data_01882 (key Int) Engine=MergeTree() partition by key order by key as select * from numbers(10)"
# send_progress_in_http_headers will periodically send the progress
# but this is not stable, i.e. it can be dumped on query end,
# thus check few times to be sure that this is not coincidence.
for _ in {1..30}; do
    $CLICKHOUSE_CURL -vsS "${CLICKHOUSE_URL}&max_threads=1&default_format=Null&send_progress_in_http_headers=1&http_headers_progress_interval_ms=1" --data-binary @- <<< "select * from data_01882" |& {
        grep -o -F '"total_rows_to_read":"10"'
    } | {
        # grep out final result
        grep -v -F '"read_rows":"10"'
    }
done | uniq