Merge remote-tracking branch 'origin/master' into HEAD

This commit is contained in:
Alexander Kuzmenkov 2020-08-06 00:15:06 +03:00
commit 3b1bacf390
54 changed files with 2179 additions and 2782 deletions

File diff suppressed because it is too large Load Diff

View File

@ -51,6 +51,7 @@
#include <Common/getMultipleKeysFromConfig.h> #include <Common/getMultipleKeysFromConfig.h>
#include <Common/ClickHouseRevision.h> #include <Common/ClickHouseRevision.h>
#include <Common/Config/ConfigProcessor.h> #include <Common/Config/ConfigProcessor.h>
#include <Common/MemorySanitizer.h>
#include <Common/SymbolIndex.h> #include <Common/SymbolIndex.h>
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
@ -76,6 +77,15 @@ static void call_default_signal_handler(int sig)
raise(sig); raise(sig);
} }
const char * msan_strsignal(int sig)
{
// Apparently strsignal is not instrumented by MemorySanitizer, so we
// have to unpoison it to avoid msan reports inside fmt library when we
// print it.
const char * signal_name = strsignal(sig);
__msan_unpoison_string(signal_name);
return signal_name;
}
static constexpr size_t max_query_id_size = 127; static constexpr size_t max_query_id_size = 127;
@ -280,12 +290,14 @@ private:
if (query_id.empty()) if (query_id.empty())
{ {
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, strsignal(sig), sig); VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, msan_strsignal(sig), sig);
} }
else else
{ {
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})", LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, query_id, strsignal(sig), sig); VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
thread_num, query_id, msan_strsignal(sig), sig);
} }
String error_message; String error_message;
@ -833,13 +845,13 @@ void BaseDaemon::handleSignal(int signal_id)
onInterruptSignals(signal_id); onInterruptSignals(signal_id);
} }
else else
throw DB::Exception(std::string("Unsupported signal: ") + strsignal(signal_id), 0); throw DB::Exception(std::string("Unsupported signal: ") + msan_strsignal(signal_id), 0);
} }
void BaseDaemon::onInterruptSignals(int signal_id) void BaseDaemon::onInterruptSignals(int signal_id)
{ {
is_cancelled = true; is_cancelled = true;
LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id)); LOG_INFO(&logger(), "Received termination signal ({})", msan_strsignal(signal_id));
if (sigint_signals_counter >= 2) if (sigint_signals_counter >= 2)
{ {

View File

@ -20,6 +20,12 @@ endif ()
option (WEVERYTHING "Enables -Weverything option with some exceptions. This is intended for exploration of new compiler warnings that may be found to be useful. Only makes sense for clang." ON) option (WEVERYTHING "Enables -Weverything option with some exceptions. This is intended for exploration of new compiler warnings that may be found to be useful. Only makes sense for clang." ON)
# Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
# Only in release build because debug has too large stack frames.
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE))
add_warning(frame-larger-than=16384)
endif ()
if (COMPILER_CLANG) if (COMPILER_CLANG)
add_warning(pedantic) add_warning(pedantic)
no_warning(vla-extension) no_warning(vla-extension)

View File

@ -53,7 +53,7 @@ mkdir -p /etc/clickhouse-server
mkdir -p /etc/clickhouse-client mkdir -p /etc/clickhouse-client
mkdir -p /etc/clickhouse-server/config.d mkdir -p /etc/clickhouse-server/config.d
mkdir -p /etc/clickhouse-server/users.d mkdir -p /etc/clickhouse-server/users.d
mkdir -p /var/log/clickhouse-server ln -s /test_output /var/log/clickhouse-server
cp $CLICKHOUSE_DIR/programs/server/config.xml /etc/clickhouse-server/ cp $CLICKHOUSE_DIR/programs/server/config.xml /etc/clickhouse-server/
cp $CLICKHOUSE_DIR/programs/server/users.xml /etc/clickhouse-server/ cp $CLICKHOUSE_DIR/programs/server/users.xml /etc/clickhouse-server/
@ -66,7 +66,6 @@ ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/custom_settings_prefixes.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/custom_settings_prefixes.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/ ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
@ -84,6 +83,10 @@ ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
# Keep original query_masking_rules.xml
ln -s --backup=simple --suffix=_original.xml /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
clickhouse-server --config /etc/clickhouse-server/config.xml --daemon clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
counter=0 counter=0
@ -161,15 +164,15 @@ clickhouse-test -j 4 --no-long --testname --shard --zookeeper --skip ${TESTS_TO_
kill_clickhouse () { kill_clickhouse () {
kill `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'` 2>/dev/null killall clickhouse-server ||:
for i in {1..10} for i in {1..10}
do do
if ! kill -0 `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'`; then if ! killall -0 clickhouse-server; then
echo "No clickhouse process" echo "No clickhouse process"
break break
else else
echo "Process" `ps ax | grep clickhouse-server | grep -v 'grep' | awk '{print $1}'` "still alive" echo "Clickhouse server process" $(pgrep -f clickhouse-server) "still alive"
sleep 10 sleep 10
fi fi
done done
@ -202,5 +205,3 @@ if [[ ! -z "$FAILED_TESTS" ]]; then
else else
echo "No failed tests" echo "No failed tests"
fi fi
mv /var/log/clickhouse-server/* /test_output

View File

@ -17,6 +17,8 @@ parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
help='Which report to build') help='Which report to build')
args = parser.parse_args() args = parser.parse_args()
tables = []
errors_explained = []
report_errors = [] report_errors = []
error_tests = 0 error_tests = 0
slow_average_tests = 0 slow_average_tests = 0
@ -145,19 +147,40 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
table_anchor = 0 table_anchor = 0
row_anchor = 0 row_anchor = 0
def nextTableAnchor(): def currentTableAnchor():
global table_anchor
return f'{table_anchor}'
def newTableAnchor():
global table_anchor global table_anchor
table_anchor += 1 table_anchor += 1
return str(table_anchor) return currentTableAnchor()
def currentRowAnchor():
global row_anchor
global table_anchor
return f'{table_anchor}.{row_anchor}'
def nextRowAnchor(): def nextRowAnchor():
global row_anchor
global table_anchor
return f'{table_anchor}.{row_anchor + 1}'
def setRowAnchor(anchor_row_part):
global row_anchor
global table_anchor
row_anchor = anchor_row_part
return currentRowAnchor()
def advanceRowAnchor():
global row_anchor global row_anchor
global table_anchor global table_anchor
row_anchor += 1 row_anchor += 1
return str(table_anchor) + "." + str(row_anchor) return currentRowAnchor()
def tr(x): def tr(x):
a = nextRowAnchor() a = advanceRowAnchor()
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x)) #return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
return '<tr id={a}>{x}</tr>'.format(a=a, x=str(x)) return '<tr id={a}>{x}</tr>'.format(a=a, x=str(x))
@ -180,8 +203,10 @@ def tableHeader(r):
return tr(''.join([th(f) for f in r])) return tr(''.join([th(f) for f in r]))
def tableStart(title): def tableStart(title):
anchor = nextTableAnchor();
cls = '-'.join(title.lower().split(' ')[:3]); cls = '-'.join(title.lower().split(' ')[:3]);
global table_anchor
table_anchor = cls
anchor = currentTableAnchor()
return f""" return f"""
<h2 id="{anchor}"> <h2 id="{anchor}">
<a class="cancela" href="#{anchor}">{title}</a> <a class="cancela" href="#{anchor}">{title}</a>
@ -211,20 +236,23 @@ def htmlRows(n):
result += tableRow(row) result += tableRow(row)
return result return result
def printSimpleTable(caption, columns, rows): def addSimpleTable(caption, columns, rows, pos=None):
global tables
text = ''
if not rows: if not rows:
return return
print(tableStart(caption)) text += tableStart(caption)
print(tableHeader(columns)) text += tableHeader(columns)
for row in rows: for row in rows:
print(tableRow(row)) text += tableRow(row)
print(tableEnd()) text += tableEnd()
tables.insert(pos if pos else len(tables), text)
def print_tested_commits(): def add_tested_commits():
global report_errors global report_errors
try: try:
printSimpleTable('Tested commits', ['Old', 'New'], addSimpleTable('Tested commits', ['Old', 'New'],
[['<pre>{}</pre>'.format(x) for x in [['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(), [open('left-commit.txt').read(),
open('right-commit.txt').read()]]]) open('right-commit.txt').read()]]])
@ -235,7 +263,8 @@ def print_tested_commits():
*sys.exc_info()[:2])[-1]) *sys.exc_info()[:2])[-1])
pass pass
def print_report_errors(): def add_report_errors():
global tables
global report_errors global report_errors
# Add the errors reported by various steps of comparison script # Add the errors reported by various steps of comparison script
try: try:
@ -246,67 +275,89 @@ def print_report_errors():
*sys.exc_info()[:2])[-1]) *sys.exc_info()[:2])[-1])
pass pass
if len(report_errors): if not report_errors:
print(tableStart('Errors while building the report')) return
print(tableHeader(['Error']))
for x in report_errors: text = tableStart('Errors while building the report')
print(tableRow([x])) text += tableHeader(['Error'])
print(tableEnd()) for x in report_errors:
text += tableRow([x])
text += tableEnd()
# Insert after Tested Commits
tables.insert(1, text)
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']);
def add_errors_explained():
global tables
addSimpleTable('Error summary', ['Description'], errors_explained, 1)
if args.report == 'main': if args.report == 'main':
print(header_template.format()) print(header_template.format())
print_tested_commits() add_tested_commits()
run_error_rows = tsvRows('run-errors.tsv') run_error_rows = tsvRows('run-errors.tsv')
error_tests += len(run_error_rows) error_tests += len(run_error_rows)
printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows) addSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
if run_error_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']);
slow_on_client_rows = tsvRows('report/slow-on-client.tsv') slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
error_tests += len(slow_on_client_rows) error_tests += len(slow_on_client_rows)
printSimpleTable('Slow on client', addSimpleTable('Slow on client',
['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'], ['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
slow_on_client_rows) slow_on_client_rows)
if slow_on_client_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']);
unmarked_short_rows = tsvRows('report/unmarked-short-queries.tsv') unmarked_short_rows = tsvRows('report/unmarked-short-queries.tsv')
error_tests += len(unmarked_short_rows) error_tests += len(unmarked_short_rows)
printSimpleTable('Short queries not marked as short', addSimpleTable('Short queries not marked as short',
['New client time, s', 'Test', '#', 'Query'], ['New client time, s', 'Test', '#', 'Query'],
unmarked_short_rows) unmarked_short_rows)
if unmarked_short_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have short duration but are not explicitly marked as "short"</a>']);
def print_partial(): def add_partial():
rows = tsvRows('report/partial-queries-report.tsv') rows = tsvRows('report/partial-queries-report.tsv')
if not rows: if not rows:
return return
global unstable_partial_queries, slow_average_tests
print(tableStart('Partial queries')) global unstable_partial_queries, slow_average_tests, tables
text = tableStart('Partial queries')
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query'] columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query']
print(tableHeader(columns)) text += tableHeader(columns)
attrs = ['' for c in columns] attrs = ['' for c in columns]
for row in rows: for row in rows:
if float(row[1]) > 0.10: if float(row[1]) > 0.10:
attrs[1] = f'style="background: {color_bad}"' attrs[1] = f'style="background: {color_bad}"'
unstable_partial_queries += 1 unstable_partial_queries += 1
errors_explained.append([f'<a href="#{nextRowAnchor()}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>'])
else: else:
attrs[1] = '' attrs[1] = ''
if float(row[0]) > allowed_single_run_time: if float(row[0]) > allowed_single_run_time:
attrs[0] = f'style="background: {color_bad}"' attrs[0] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{nextRowAnchor()}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run} seconds"</a>'])
slow_average_tests += 1 slow_average_tests += 1
else: else:
attrs[0] = '' attrs[0] = ''
print(tableRow(row, attrs)) text += tableRow(row, attrs)
print(tableEnd()) text += tableEnd()
tables.append(text)
print_partial() add_partial()
def print_changes(): def add_changes():
rows = tsvRows('report/changed-perf.tsv') rows = tsvRows('report/changed-perf.tsv')
if not rows: if not rows:
return return
global faster_queries, slower_queries global faster_queries, slower_queries, tables
print(tableStart('Changes in performance')) text = tableStart('Changes in performance')
columns = [ columns = [
'Old,&nbsp;s', # 0 'Old,&nbsp;s', # 0
'New,&nbsp;s', # 1 'New,&nbsp;s', # 1
@ -319,7 +370,7 @@ if args.report == 'main':
'Query', # 8 'Query', # 8
] ]
print(tableHeader(columns)) text += tableHeader(columns)
attrs = ['' for c in columns] attrs = ['' for c in columns]
attrs[5] = None attrs[5] = None
@ -331,18 +382,19 @@ if args.report == 'main':
else: else:
slower_queries += 1 slower_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_bad}"' attrs[2] = attrs[3] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{nextRowAnchor()}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>'])
else: else:
attrs[2] = attrs[3] = '' attrs[2] = attrs[3] = ''
print(tableRow(row, attrs)) text += tableRow(row, attrs)
print(tableEnd()) text += tableEnd()
tables.append(text)
print_changes() add_changes()
def print_unstable_queries(): def add_unstable_queries():
global unstable_queries global unstable_queries, very_unstable_queries, tables
global very_unstable_queries
unstable_rows = tsvRows('report/unstable-queries.tsv') unstable_rows = tsvRows('report/unstable-queries.tsv')
if not unstable_rows: if not unstable_rows:
@ -361,8 +413,8 @@ if args.report == 'main':
'Query' #7 'Query' #7
] ]
print(tableStart('Unstable queries')) text = tableStart('Unstable queries')
print(tableHeader(columns)) text += tableHeader(columns)
attrs = ['' for c in columns] attrs = ['' for c in columns]
attrs[4] = None attrs[4] = None
@ -373,21 +425,22 @@ if args.report == 'main':
else: else:
attrs[3] = '' attrs[3] = ''
print(tableRow(r, attrs)) text += tableRow(r, attrs)
print(tableEnd()) text += tableEnd()
tables.append(text)
print_unstable_queries() add_unstable_queries()
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows) addSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
printSimpleTable('Test performance changes', addSimpleTable('Test performance changes',
['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'], ['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'],
tsvRows('report/test-perf-changes.tsv')) tsvRows('report/test-perf-changes.tsv'))
def print_test_times(): def add_test_times():
global slow_average_tests global slow_average_tests, tables
rows = tsvRows('report/test-times.tsv') rows = tsvRows('report/test-times.tsv')
if not rows: if not rows:
return return
@ -403,8 +456,8 @@ if args.report == 'main':
'Shortest query<br>(sum for all runs),&nbsp;s', #7 'Shortest query<br>(sum for all runs),&nbsp;s', #7
] ]
print(tableStart('Test times')) text = tableStart('Test times')
print(tableHeader(columns)) text += tableHeader(columns)
nominal_runs = 13 # FIXME pass this as an argument nominal_runs = 13 # FIXME pass this as an argument
total_runs = (nominal_runs + 1) * 2 # one prewarm run, two servers total_runs = (nominal_runs + 1) * 2 # one prewarm run, two servers
@ -414,22 +467,25 @@ if args.report == 'main':
# FIXME should be 15s max -- investigate parallel_insert # FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1 slow_average_tests += 1
attrs[6] = f'style="background: {color_bad}"' attrs[6] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="./all-queries.html#all-query-times.0">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up'])
else: else:
attrs[6] = '' attrs[6] = ''
if float(r[5]) > allowed_single_run_time * total_runs: if float(r[5]) > allowed_single_run_time * total_runs:
slow_average_tests += 1 slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"' attrs[5] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="./all-queries.html#all-query-times.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
else: else:
attrs[5] = '' attrs[5] = ''
print(tableRow(r, attrs)) text += tableRow(r, attrs)
print(tableEnd()) text += tableEnd()
tables.append(text)
print_test_times() add_test_times()
def print_benchmark_results(): def add_benchmark_results():
if not os.path.isfile('benchmark/website-left.json'): if not os.path.isfile('benchmark/website-left.json'):
return return
@ -479,26 +535,33 @@ if args.report == 'main':
all_rows.append([row, attrs]) all_rows.append([row, attrs])
print(tableStart('Concurrent benchmarks')) text = tableStart('Concurrent benchmarks')
print(tableHeader(header)) text += tableHeader(header)
for row, attrs in all_rows: for row, attrs in all_rows:
print(tableRow(row, attrs)) text += tableRow(row, attrs)
print(tableEnd()) text += tableEnd()
global tables
tables.append(text)
try: try:
print_benchmark_results() add_benchmark_results()
except: except:
report_errors.append( report_errors.append(
traceback.format_exception_only( traceback.format_exception_only(
*sys.exc_info()[:2])[-1]) *sys.exc_info()[:2])[-1])
pass pass
printSimpleTable('Metric changes', addSimpleTable('Metric changes',
['Metric', 'Old median value', 'New median value', ['Metric', 'Old median value', 'New median value',
'Relative difference', 'Times difference'], 'Relative difference', 'Times difference'],
tsvRows('metrics/changes.tsv')) tsvRows('metrics/changes.tsv'))
print_report_errors() add_report_errors()
add_errors_explained()
for t in tables:
print(t)
print(""" print("""
<p class="links"> <p class="links">
@ -559,9 +622,9 @@ elif args.report == 'all-queries':
print(header_template.format()) print(header_template.format())
print_tested_commits() add_tested_commits()
def print_all_queries(): def add_all_queries():
rows = tsvRows('report/all-queries.tsv') rows = tsvRows('report/all-queries.tsv')
if not rows: if not rows:
return return
@ -579,8 +642,8 @@ elif args.report == 'all-queries':
'Query', #9 'Query', #9
] ]
print(tableStart('All query times')) text = tableStart('All query times')
print(tableHeader(columns)) text += tableHeader(columns)
attrs = ['' for c in columns] attrs = ['' for c in columns]
attrs[0] = None attrs[0] = None
@ -606,13 +669,15 @@ elif args.report == 'all-queries':
attrs[2] = '' attrs[2] = ''
attrs[3] = '' attrs[3] = ''
print(tableRow(r, attrs)) text += tableRow(r, attrs)
print(tableEnd()) text += tableEnd()
tables.append(text)
print_all_queries() add_all_queries()
add_report_errors()
print_report_errors() for t in tables:
print(t)
print(""" print("""
<p class="links"> <p class="links">

View File

@ -28,7 +28,7 @@ ClickHouse-specific aggregate functions:
- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) - [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) - [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) - [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)
- [topK](../../../sql-reference/aggregate-functions/reference/topkweighted.md) - [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) - [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) - [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md)
- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) - [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md)

View File

@ -157,7 +157,7 @@ private:
std::string query_id; std::string query_id;
bool continue_on_errors; bool continue_on_errors;
bool print_stacktrace; bool print_stacktrace;
Settings settings; const Settings & settings;
SharedContextHolder shared_context; SharedContextHolder shared_context;
Context global_context; Context global_context;
QueryProcessingStage::Enum query_processing_stage; QueryProcessingStage::Enum query_processing_stage;

View File

@ -1323,7 +1323,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
/// Try start processing, create node about it /// Try start processing, create node about it
{ {
String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id); String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id);
CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); CleanStateClock new_clean_state_clock(zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path);
if (clean_state_clock != new_clean_state_clock) if (clean_state_clock != new_clean_state_clock)
{ {
LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number)); LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number));
@ -1360,7 +1360,8 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
LOG_DEBUG(log, "Create destination tables. Query: {}", query); LOG_DEBUG(log, "Create destination tables. Query: {}", query);
UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, PoolMode::GET_MANY); UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, PoolMode::GET_MANY);
LOG_DEBUG(log, "Destination tables {} have been created on {} shards of {}", getQuotedTable(task_table.table_push), shards, task_table.cluster_push->getShardCount()); LOG_DEBUG(log, "Destination tables {} have been created on {} shards of {}",
getQuotedTable(task_table.table_push), shards, task_table.cluster_push->getShardCount());
} }
/// Do the copying /// Do the copying
@ -1391,18 +1392,18 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
try try
{ {
std::unique_ptr<Context> context_select = std::make_unique<Context>(context);
context_select->setSettings(task_cluster->settings_pull);
std::unique_ptr<Context> context_insert = std::make_unique<Context>(context);
context_insert->setSettings(task_cluster->settings_push);
/// Custom INSERT SELECT implementation /// Custom INSERT SELECT implementation
Context context_select = context;
context_select.setSettings(task_cluster->settings_pull);
Context context_insert = context;
context_insert.setSettings(task_cluster->settings_push);
BlockInputStreamPtr input; BlockInputStreamPtr input;
BlockOutputStreamPtr output; BlockOutputStreamPtr output;
{ {
BlockIO io_select = InterpreterFactory::get(query_select_ast, context_select)->execute(); BlockIO io_select = InterpreterFactory::get(query_select_ast, *context_select)->execute();
BlockIO io_insert = InterpreterFactory::get(query_insert_ast, context_insert)->execute(); BlockIO io_insert = InterpreterFactory::get(query_insert_ast, *context_insert)->execute();
input = io_select.getInputStream(); input = io_select.getInputStream();
output = io_insert.out; output = io_insert.out;

View File

@ -154,12 +154,13 @@ public:
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE)) if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
throw Exception("Too large array size", ErrorCodes::TOO_LARGE_ARRAY_SIZE); throw Exception("Too large array size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
auto & value = this->data(place).value; if (size > 0)
{
value.resize(size, arena); auto & value = this->data(place).value;
buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0])); value.resize(size, arena);
buf.read(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
this->data(place).sum = value.back(); this->data(place).sum = value.back();
}
} }
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override

View File

@ -149,7 +149,7 @@ static void getNotEnoughMemoryMessage(std::string & msg)
#if defined(__linux__) #if defined(__linux__)
try try
{ {
static constexpr size_t buf_size = 4096; static constexpr size_t buf_size = 1024;
char buf[buf_size]; char buf[buf_size];
UInt64 max_map_count = 0; UInt64 max_map_count = 0;

View File

@ -8,11 +8,13 @@
#define __msan_unpoison(X, Y) #define __msan_unpoison(X, Y)
#define __msan_test_shadow(X, Y) (false) #define __msan_test_shadow(X, Y) (false)
#define __msan_print_shadow(X, Y) #define __msan_print_shadow(X, Y)
#define __msan_unpoison_string(X)
#if defined(__has_feature) #if defined(__has_feature)
# if __has_feature(memory_sanitizer) # if __has_feature(memory_sanitizer)
# undef __msan_unpoison # undef __msan_unpoison
# undef __msan_test_shadow # undef __msan_test_shadow
# undef __msan_print_shadow # undef __msan_print_shadow
# undef __msan_unpoison_string
# include <sanitizer/msan_interface.h> # include <sanitizer/msan_interface.h>
# endif # endif
#endif #endif

View File

@ -252,7 +252,7 @@ private:
/// There are loops of NUM_PASSES. It is very important that they are unfolded at compile-time. /// There are loops of NUM_PASSES. It is very important that they are unfolded at compile-time.
/// For each of the NUM_PASSES bit ranges of the key, consider how many times each value of this bit range met. /// For each of the NUM_PASSES bit ranges of the key, consider how many times each value of this bit range met.
CountType histograms[HISTOGRAM_SIZE * NUM_PASSES] = {0}; std::unique_ptr<CountType[]> histograms{new CountType[HISTOGRAM_SIZE * NUM_PASSES]{}};
typename Traits::Allocator allocator; typename Traits::Allocator allocator;
@ -358,7 +358,7 @@ private:
/// The beginning of every i-1-th bucket. 0th element will be equal to 1st. /// The beginning of every i-1-th bucket. 0th element will be equal to 1st.
/// Last element will point to array end. /// Last element will point to array end.
Element * prev_buckets[HISTOGRAM_SIZE + 1]; std::unique_ptr<Element *[]> prev_buckets{new Element*[HISTOGRAM_SIZE + 1]};
/// The beginning of every i-th bucket (the same array shifted by one). /// The beginning of every i-th bucket (the same array shifted by one).
Element ** buckets = &prev_buckets[1]; Element ** buckets = &prev_buckets[1];
@ -375,7 +375,7 @@ private:
/// also it corresponds with the results from https://github.com/powturbo/TurboHist /// also it corresponds with the results from https://github.com/powturbo/TurboHist
static constexpr size_t UNROLL_COUNT = 8; static constexpr size_t UNROLL_COUNT = 8;
CountType count[HISTOGRAM_SIZE * UNROLL_COUNT]{}; std::unique_ptr<CountType[]> count{new CountType[HISTOGRAM_SIZE * UNROLL_COUNT]{}};
size_t unrolled_size = size / UNROLL_COUNT * UNROLL_COUNT; size_t unrolled_size = size / UNROLL_COUNT * UNROLL_COUNT;
for (Element * elem = arr; elem < arr + unrolled_size; elem += UNROLL_COUNT) for (Element * elem = arr; elem < arr + unrolled_size; elem += UNROLL_COUNT)

View File

@ -318,7 +318,7 @@ protected:
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1) /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */ * storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
VolnitskyTraits::Offset hash[VolnitskyTraits::hash_size]; /// Hash table. std::unique_ptr<VolnitskyTraits::Offset[]> hash; /// Hash table.
const bool fallback; /// Do we need to use the fallback algorithm. const bool fallback; /// Do we need to use the fallback algorithm.
@ -340,7 +340,7 @@ public:
if (fallback) if (fallback)
return; return;
memset(hash, 0, sizeof(hash)); hash = std::unique_ptr<VolnitskyTraits::Offset[]>(new VolnitskyTraits::Offset[VolnitskyTraits::hash_size]{});
auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); }; auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); };
/// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0 /// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
@ -419,7 +419,7 @@ private:
VolnitskyTraits::Offset off; VolnitskyTraits::Offset off;
}; };
OffsetId hash[VolnitskyTraits::hash_size]; std::unique_ptr<OffsetId[]> hash;
/// step for each bunch of strings /// step for each bunch of strings
size_t step; size_t step;
@ -434,6 +434,7 @@ public:
MultiVolnitskyBase(const std::vector<StringRef> & needles_) : needles{needles_}, step{0}, last{0} MultiVolnitskyBase(const std::vector<StringRef> & needles_) : needles{needles_}, step{0}, last{0}
{ {
fallback_searchers.reserve(needles.size()); fallback_searchers.reserve(needles.size());
hash = std::unique_ptr<OffsetId[]>(new OffsetId[VolnitskyTraits::hash_size]); /// No zero initialization, it will be done later.
} }
/** /**
@ -454,7 +455,7 @@ public:
if (last == needles.size()) if (last == needles.size())
return false; return false;
memset(hash, 0, sizeof(hash)); memset(hash.get(), 0, VolnitskyTraits::hash_size * sizeof(OffsetId));
fallback_needles.clear(); fallback_needles.clear();
step = std::numeric_limits<size_t>::max(); step = std::numeric_limits<size_t>::max();

View File

@ -79,8 +79,8 @@ String getFilesystemName([[maybe_unused]] const String & mount_point)
throw DB::Exception("Cannot open /etc/mtab to get name of filesystem", ErrorCodes::SYSTEM_ERROR); throw DB::Exception("Cannot open /etc/mtab to get name of filesystem", ErrorCodes::SYSTEM_ERROR);
mntent fs_info; mntent fs_info;
constexpr size_t buf_size = 4096; /// The same as buffer used for getmntent in glibc. It can happen that it's not enough constexpr size_t buf_size = 4096; /// The same as buffer used for getmntent in glibc. It can happen that it's not enough
char buf[buf_size]; std::vector<char> buf(buf_size);
while (getmntent_r(mounted_filesystems, &fs_info, buf, buf_size) && fs_info.mnt_dir != mount_point) while (getmntent_r(mounted_filesystems, &fs_info, buf.data(), buf_size) && fs_info.mnt_dir != mount_point)
; ;
endmntent(mounted_filesystems); endmntent(mounted_filesystems);
if (fs_info.mnt_dir != mount_point) if (fs_info.mnt_dir != mount_point)

View File

@ -235,8 +235,7 @@ void DatabaseOrdinary::alterTable(const Context & context, const StorageID & tab
String statement; String statement;
{ {
char in_buf[METADATA_FILE_BUFFER_SIZE]; ReadBufferFromFile in(table_metadata_path, METADATA_FILE_BUFFER_SIZE);
ReadBufferFromFile in(table_metadata_path, METADATA_FILE_BUFFER_SIZE, -1, in_buf);
readStringUntilEOF(statement, in); readStringUntilEOF(statement, in);
} }

View File

@ -48,11 +48,11 @@ struct NgramDistanceImpl
/// Max codepoints to store at once. 16 is for batching usage and PODArray has this padding. /// Max codepoints to store at once. 16 is for batching usage and PODArray has this padding.
static constexpr size_t simultaneously_codepoints_num = default_padding + N - 1; static constexpr size_t simultaneously_codepoints_num = default_padding + N - 1;
/** This fits mostly in L2 cache all the time. /** map_size of this fits mostly in L2 cache all the time.
* Actually use UInt16 as addings and subtractions do not UB overflow. But think of it as a signed * Actually use UInt16 as addings and subtractions do not UB overflow. But think of it as a signed
* integer array. * integer array.
*/ */
using NgramStats = UInt16[map_size]; using NgramCount = UInt16;
static ALWAYS_INLINE UInt16 calculateASCIIHash(const CodePoint * code_points) static ALWAYS_INLINE UInt16 calculateASCIIHash(const CodePoint * code_points)
{ {
@ -169,8 +169,8 @@ struct NgramDistanceImpl
static ALWAYS_INLINE inline size_t calculateNeedleStats( static ALWAYS_INLINE inline size_t calculateNeedleStats(
const char * data, const char * data,
const size_t size, const size_t size,
NgramStats & ngram_stats, NgramCount * ngram_stats,
[[maybe_unused]] UInt16 * ngram_storage, [[maybe_unused]] NgramCount * ngram_storage,
size_t (*read_code_points)(CodePoint *, const char *&, const char *), size_t (*read_code_points)(CodePoint *, const char *&, const char *),
UInt16 (*hash_functor)(const CodePoint *)) UInt16 (*hash_functor)(const CodePoint *))
{ {
@ -202,7 +202,7 @@ struct NgramDistanceImpl
static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric( static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
const char * data, const char * data,
const size_t size, const size_t size,
NgramStats & ngram_stats, NgramCount * ngram_stats,
size_t & distance, size_t & distance,
[[maybe_unused]] UInt16 * ngram_storage, [[maybe_unused]] UInt16 * ngram_storage,
size_t (*read_code_points)(CodePoint *, const char *&, const char *), size_t (*read_code_points)(CodePoint *, const char *&, const char *),
@ -256,7 +256,7 @@ struct NgramDistanceImpl
static void constantConstant(std::string data, std::string needle, Float32 & res) static void constantConstant(std::string data, std::string needle, Float32 & res)
{ {
NgramStats common_stats = {}; std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
/// We use unsafe versions of getting ngrams, so I decided to use padded strings. /// We use unsafe versions of getting ngrams, so I decided to use padded strings.
const size_t needle_size = needle.size(); const size_t needle_size = needle.size();
@ -264,11 +264,11 @@ struct NgramDistanceImpl
needle.resize(needle_size + default_padding); needle.resize(needle_size + default_padding);
data.resize(data_size + default_padding); data.resize(data_size + default_padding);
size_t second_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats, nullptr); size_t second_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats.get(), nullptr);
size_t distance = second_size; size_t distance = second_size;
if (data_size <= max_string_size) if (data_size <= max_string_size)
{ {
size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats, distance, nullptr); size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats.get(), distance, nullptr);
/// For !symmetric version we should not use first_size. /// For !symmetric version we should not use first_size.
if constexpr (symmetric) if constexpr (symmetric)
res = distance * 1.f / std::max(first_size + second_size, size_t(1)); res = distance * 1.f / std::max(first_size + second_size, size_t(1));
@ -295,7 +295,7 @@ struct NgramDistanceImpl
size_t prev_haystack_offset = 0; size_t prev_haystack_offset = 0;
size_t prev_needle_offset = 0; size_t prev_needle_offset = 0;
NgramStats common_stats = {}; std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
/// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb). /// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb).
/// And we can reuse these storages in one thread because we care only about what was written to first places. /// And we can reuse these storages in one thread because we care only about what was written to first places.
@ -316,7 +316,7 @@ struct NgramDistanceImpl
calculateNeedleStats<true>, calculateNeedleStats<true>,
needle, needle,
needle_size, needle_size,
common_stats, common_stats.get(),
needle_ngram_storage.get()); needle_ngram_storage.get());
size_t distance = needle_stats_size; size_t distance = needle_stats_size;
@ -326,7 +326,7 @@ struct NgramDistanceImpl
calculateHaystackStatsAndMetric<true>, calculateHaystackStatsAndMetric<true>,
haystack, haystack,
haystack_size, haystack_size,
common_stats, common_stats.get(),
distance, distance,
haystack_ngram_storage.get()); haystack_ngram_storage.get());
@ -378,7 +378,7 @@ struct NgramDistanceImpl
const size_t needle_offsets_size = needle_offsets.size(); const size_t needle_offsets_size = needle_offsets.size();
size_t prev_offset = 0; size_t prev_offset = 0;
NgramStats common_stats = {}; std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]); std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]);
std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]); std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]);
@ -394,7 +394,7 @@ struct NgramDistanceImpl
calculateNeedleStats<true>, calculateNeedleStats<true>,
needle, needle,
needle_size, needle_size,
common_stats, common_stats.get(),
needle_ngram_storage.get()); needle_ngram_storage.get());
size_t distance = needle_stats_size; size_t distance = needle_stats_size;
@ -403,7 +403,7 @@ struct NgramDistanceImpl
calculateHaystackStatsAndMetric<true>, calculateHaystackStatsAndMetric<true>,
haystack.data(), haystack.data(),
haystack_size, haystack_size,
common_stats, common_stats.get(),
distance, distance,
haystack_ngram_storage.get()); haystack_ngram_storage.get());
@ -430,17 +430,16 @@ struct NgramDistanceImpl
PaddedPODArray<Float32> & res) PaddedPODArray<Float32> & res)
{ {
/// zeroing our map /// zeroing our map
NgramStats common_stats = {}; std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
/// The main motivation is to not allocate more on stack because we have already allocated a lot (128Kb). /// We can reuse these storages in one thread because we care only about what was written to first places.
/// And we can reuse these storages in one thread because we care only about what was written to first places. std::unique_ptr<UInt16[]> ngram_storage(new NgramCount[max_string_size]);
std::unique_ptr<UInt16[]> ngram_storage(new UInt16[max_string_size]);
/// We use unsafe versions of getting ngrams, so I decided to use padded_data even in needle case. /// We use unsafe versions of getting ngrams, so I decided to use padded_data even in needle case.
const size_t needle_size = needle.size(); const size_t needle_size = needle.size();
needle.resize(needle_size + default_padding); needle.resize(needle_size + default_padding);
const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats, nullptr); const size_t needle_stats_size = dispatchSearcher(calculateNeedleStats<false>, needle.data(), needle_size, common_stats.get(), nullptr);
size_t distance = needle_stats_size; size_t distance = needle_stats_size;
size_t prev_offset = 0; size_t prev_offset = 0;
@ -453,7 +452,7 @@ struct NgramDistanceImpl
size_t haystack_stats_size = dispatchSearcher( size_t haystack_stats_size = dispatchSearcher(
calculateHaystackStatsAndMetric<true>, calculateHaystackStatsAndMetric<true>,
reinterpret_cast<const char *>(haystack), reinterpret_cast<const char *>(haystack),
haystack_size, common_stats, haystack_size, common_stats.get(),
distance, distance,
ngram_storage.get()); ngram_storage.get());
/// For !symmetric version we should not use haystack_stats_size. /// For !symmetric version we should not use haystack_stats_size.

View File

@ -58,8 +58,8 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override; void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override;
private: private:
/// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess. /// Initially allocate a piece of memory for 64 elements. NOTE: This is just a guess.
static constexpr size_t INITIAL_SIZE_DEGREE = 9; static constexpr size_t INITIAL_SIZE_DEGREE = 6;
template <typename T> template <typename T>
struct MethodOneNumber struct MethodOneNumber

View File

@ -118,8 +118,8 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override; void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override;
private: private:
/// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess. /// Initially allocate a piece of memory for 64 elements. NOTE: This is just a guess.
static constexpr size_t INITIAL_SIZE_DEGREE = 9; static constexpr size_t INITIAL_SIZE_DEGREE = 6;
void executeMethodImpl( void executeMethodImpl(
const std::vector<const ColumnArray::Offsets *> & offsets_by_depth, const std::vector<const ColumnArray::Offsets *> & offsets_by_depth,

View File

@ -55,8 +55,8 @@ public:
private: private:
const Context & context; const Context & context;
/// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess. /// Initially allocate a piece of memory for 64 elements. NOTE: This is just a guess.
static constexpr size_t INITIAL_SIZE_DEGREE = 9; static constexpr size_t INITIAL_SIZE_DEGREE = 6;
struct UnpackedArrays struct UnpackedArrays
{ {

View File

@ -42,12 +42,12 @@ void AIOContextPool::doMonitor()
void AIOContextPool::waitForCompletion() void AIOContextPool::waitForCompletion()
{ {
/// array to hold completion events /// array to hold completion events
io_event events[max_concurrent_events]; std::vector<io_event> events(max_concurrent_events);
try try
{ {
const auto num_events = getCompletionEvents(events, max_concurrent_events); const auto num_events = getCompletionEvents(events.data(), max_concurrent_events);
fulfillPromises(events, num_events); fulfillPromises(events.data(), num_events);
notifyProducers(num_events); notifyProducers(num_events);
} }
catch (...) catch (...)

View File

@ -33,7 +33,8 @@ private:
working_buffer = in.buffer(); working_buffer = in.buffer();
pos = in.position(); pos = in.position();
calculateHash(working_buffer.begin(), working_buffer.size()); // `pos` may be different from working_buffer.begin() when using AIO.
calculateHash(pos, working_buffer.end() - pos);
return res; return res;
} }

View File

@ -22,6 +22,8 @@ PeekableReadBuffer::PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_
void PeekableReadBuffer::reset() void PeekableReadBuffer::reset()
{ {
checkStateCorrect();
peeked_size = 0; peeked_size = 0;
checkpoint = nullptr; checkpoint = nullptr;
checkpoint_in_own_memory = false; checkpoint_in_own_memory = false;
@ -31,6 +33,8 @@ void PeekableReadBuffer::reset()
Buffer & sub_working = sub_buf.buffer(); Buffer & sub_working = sub_buf.buffer();
BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset());
checkStateCorrect();
} }
bool PeekableReadBuffer::peekNext() bool PeekableReadBuffer::peekNext()
@ -150,7 +154,7 @@ bool PeekableReadBuffer::nextImpl()
/// Switch to reading from sub_buf (or just update it if already switched) /// Switch to reading from sub_buf (or just update it if already switched)
Buffer & sub_working = sub_buf.buffer(); Buffer & sub_working = sub_buf.buffer();
BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset()); BufferBase::set(sub_working.begin(), sub_working.size(), sub_buf.offset());
working_buffer_offset = sub_buf.offset(); nextimpl_working_buffer_offset = sub_buf.offset();
checkStateCorrect(); checkStateCorrect();
return res; return res;
@ -159,7 +163,6 @@ bool PeekableReadBuffer::nextImpl()
void PeekableReadBuffer::checkStateCorrect() const void PeekableReadBuffer::checkStateCorrect() const
{ {
#ifndef NDEBUG
if (checkpoint) if (checkpoint)
{ {
if (checkpointInOwnMemory()) if (checkpointInOwnMemory())
@ -190,7 +193,6 @@ void PeekableReadBuffer::checkStateCorrect() const
throw DB::Exception("Pos in empty own buffer", ErrorCodes::LOGICAL_ERROR); throw DB::Exception("Pos in empty own buffer", ErrorCodes::LOGICAL_ERROR);
if (unread_limit < memory.size()) if (unread_limit < memory.size())
throw DB::Exception("Size limit exceed", ErrorCodes::LOGICAL_ERROR); throw DB::Exception("Size limit exceed", ErrorCodes::LOGICAL_ERROR);
#endif
} }
void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append) void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append)
@ -245,11 +247,10 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append)
void PeekableReadBuffer::makeContinuousMemoryFromCheckpointToPos() void PeekableReadBuffer::makeContinuousMemoryFromCheckpointToPos()
{ {
#ifndef NDEBUG
if (!checkpoint) if (!checkpoint)
throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR); throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR);
checkStateCorrect(); checkStateCorrect();
#endif
if (!checkpointInOwnMemory() || currentlyReadFromOwnMemory()) if (!checkpointInOwnMemory() || currentlyReadFromOwnMemory())
return; /// is't already continuous return; /// is't already continuous

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <cassert>
#include <cstring> #include <cstring>
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
@ -41,6 +42,11 @@ public:
*/ */
ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {} ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {}
// Copying the read buffers can be dangerous because they can hold a lot of
// memory or open files, so better to disable the copy constructor to prevent
// accidental copying.
ReadBuffer(const ReadBuffer &) = delete;
// FIXME: behavior differs greately from `BufferBase::set()` and it's very confusing. // FIXME: behavior differs greately from `BufferBase::set()` and it's very confusing.
void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); } void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); }
@ -54,8 +60,8 @@ public:
if (!res) if (!res)
working_buffer.resize(0); working_buffer.resize(0);
pos = working_buffer.begin() + working_buffer_offset; pos = working_buffer.begin() + nextimpl_working_buffer_offset;
working_buffer_offset = 0; nextimpl_working_buffer_offset = 0;
return res; return res;
} }
@ -169,8 +175,10 @@ public:
} }
protected: protected:
/// The number of bytes to ignore from the initial position of `working_buffer` buffer. /// The number of bytes to ignore from the initial position of `working_buffer`
size_t working_buffer_offset = 0; /// buffer. Apparently this is an additional out-parameter for nextImpl(),
/// not a real field.
size_t nextimpl_working_buffer_offset = 0;
private: private:
/** Read the next data and fill a buffer with it. /** Read the next data and fill a buffer with it.

View File

@ -298,7 +298,7 @@ void ReadBufferAIO::finalize()
first_unread_pos_in_file += bytes_read; first_unread_pos_in_file += bytes_read;
total_bytes_read += bytes_read; total_bytes_read += bytes_read;
working_buffer_offset = region_left_padding; nextimpl_working_buffer_offset = region_left_padding;
if (total_bytes_read == max_bytes_read) if (total_bytes_read == max_bytes_read)
is_eof = true; is_eof = true;

View File

@ -32,8 +32,6 @@ public:
ReadBufferFromFile(int fd, const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, ReadBufferFromFile(int fd, const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr, size_t alignment = 0); char * existing_memory = nullptr, size_t alignment = 0);
ReadBufferFromFile(ReadBufferFromFile &&) = default;
~ReadBufferFromFile() override; ~ReadBufferFromFile() override;
/// Close file before destruction of object. /// Close file before destruction of object.

View File

@ -17,7 +17,6 @@ class ReadBufferFromFileBase : public BufferWithOwnMemory<SeekableReadBuffer>
public: public:
ReadBufferFromFileBase(); ReadBufferFromFileBase();
ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment); ReadBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment);
ReadBufferFromFileBase(ReadBufferFromFileBase &&) = default;
~ReadBufferFromFileBase() override; ~ReadBufferFromFileBase() override;
virtual std::string getFileName() const = 0; virtual std::string getFileName() const = 0;

View File

@ -85,7 +85,7 @@ bool ReadBufferFromFileDescriptor::nextImpl()
} }
} }
pos_in_file += bytes_read; file_offset_of_buffer_end += bytes_read;
if (bytes_read) if (bytes_read)
{ {
@ -102,22 +102,35 @@ bool ReadBufferFromFileDescriptor::nextImpl()
/// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen. /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen.
off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
{ {
off_t new_pos; size_t new_pos;
if (whence == SEEK_SET) if (whence == SEEK_SET)
{
assert(offset >= 0);
new_pos = offset; new_pos = offset;
}
else if (whence == SEEK_CUR) else if (whence == SEEK_CUR)
new_pos = pos_in_file - (working_buffer.end() - pos) + offset; {
new_pos = file_offset_of_buffer_end - (working_buffer.end() - pos) + offset;
}
else else
{
throw Exception("ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); throw Exception("ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
/// Position is unchanged. /// Position is unchanged.
if (new_pos + (working_buffer.end() - pos) == pos_in_file) if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end)
return new_pos; return new_pos;
if (hasPendingData() && new_pos <= pos_in_file && new_pos >= pos_in_file - static_cast<off_t>(working_buffer.size())) // file_offset_of_buffer_end corresponds to working_buffer.end(); it's a past-the-end pos,
// so the second inequality is strict.
if (file_offset_of_buffer_end - working_buffer.size() <= static_cast<size_t>(new_pos)
&& new_pos < file_offset_of_buffer_end)
{ {
/// Position is still inside buffer. /// Position is still inside buffer.
pos = working_buffer.begin() + (new_pos - (pos_in_file - working_buffer.size())); pos = working_buffer.end() - file_offset_of_buffer_end + new_pos;
assert(pos >= working_buffer.begin());
assert(pos < working_buffer.end());
return new_pos; return new_pos;
} }
else else
@ -130,7 +143,7 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
if (-1 == res) if (-1 == res)
throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(),
ErrorCodes::CANNOT_SEEK_THROUGH_FILE); ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
pos_in_file = new_pos; file_offset_of_buffer_end = new_pos;
watch.stop(); watch.stop();
ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());

View File

@ -14,7 +14,7 @@ class ReadBufferFromFileDescriptor : public ReadBufferFromFileBase
{ {
protected: protected:
int fd; int fd;
off_t pos_in_file; /// What offset in file corresponds to working_buffer.end(). size_t file_offset_of_buffer_end; /// What offset in file corresponds to working_buffer.end().
bool nextImpl() override; bool nextImpl() override;
@ -23,9 +23,7 @@ protected:
public: public:
ReadBufferFromFileDescriptor(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) ReadBufferFromFileDescriptor(int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
: ReadBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_), pos_in_file(0) {} : ReadBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_), file_offset_of_buffer_end(0) {}
ReadBufferFromFileDescriptor(ReadBufferFromFileDescriptor &&) = default;
int getFD() const int getFD() const
{ {
@ -34,7 +32,7 @@ public:
off_t getPosition() override off_t getPosition() override
{ {
return pos_in_file - (working_buffer.end() - pos); return file_offset_of_buffer_end - (working_buffer.end() - pos);
} }
/// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen. /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen.

View File

@ -19,7 +19,6 @@ class ReadBufferFromHDFS : public BufferWithOwnMemory<ReadBuffer>
std::unique_ptr<ReadBufferFromHDFSImpl> impl; std::unique_ptr<ReadBufferFromHDFSImpl> impl;
public: public:
ReadBufferFromHDFS(const std::string & hdfs_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); ReadBufferFromHDFS(const std::string & hdfs_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
ReadBufferFromHDFS(ReadBufferFromHDFS &&) = default;
~ReadBufferFromHDFS() override; ~ReadBufferFromHDFS() override;
bool nextImpl() override; bool nextImpl() override;

View File

@ -1100,9 +1100,14 @@ bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current)
return true; return true;
saveUpToPosition(in, memory, current); saveUpToPosition(in, memory, current);
bool loaded_more = !in.eof(); bool loaded_more = !in.eof();
assert(in.position() == in.buffer().begin()); // A sanity check. Buffer position may be in the beginning of the buffer
// (normal case), or have some offset from it (AIO).
assert(in.position() >= in.buffer().begin());
assert(in.position() <= in.buffer().end());
current = in.position(); current = in.position();
return loaded_more; return loaded_more;
} }

View File

@ -11,6 +11,8 @@ namespace ErrorCodes
{ {
extern const int INVALID_JOIN_ON_EXPRESSION; extern const int INVALID_JOIN_ON_EXPRESSION;
extern const int AMBIGUOUS_COLUMN_NAME; extern const int AMBIGUOUS_COLUMN_NAME;
extern const int SYNTAX_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
@ -54,47 +56,58 @@ void CollectJoinOnKeysMatcher::Data::asofToJoinKeys()
addJoinKeys(asof_left_key, asof_right_key, {1, 2}); addJoinKeys(asof_left_key, asof_right_key, {1, 2});
} }
void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data) void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data)
{ {
if (func.name == "and") if (func.name == "and")
return; /// go into children return; /// go into children
if (func.name == "equals") if (func.name == "or")
throw Exception("JOIN ON does not support OR. Unexpected '" + queryToString(ast) + "'", ErrorCodes::NOT_IMPLEMENTED);
ASOF::Inequality inequality = ASOF::getInequality(func.name);
if (func.name == "equals" || inequality != ASOF::Inequality::None)
{ {
if (func.arguments->children.size() != 2) if (func.arguments->children.size() != 2)
{ throw Exception("Function " + func.name + " takes two arguments, got '" + func.formatForErrorMessage() + "' instead",
throwSyntaxException("Function 'equals' takes two arguments, got '" ErrorCodes::SYNTAX_ERROR);
+ func.formatForErrorMessage() + "' instead."); }
} else
throw Exception("Expected equality or inequality, got '" + queryToString(ast) + "'", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
if (func.name == "equals")
{
ASTPtr left = func.arguments->children.at(0); ASTPtr left = func.arguments->children.at(0);
ASTPtr right = func.arguments->children.at(1); ASTPtr right = func.arguments->children.at(1);
auto table_numbers = getTableNumbers(ast, left, right, data); auto table_numbers = getTableNumbers(ast, left, right, data);
data.addJoinKeys(left, right, table_numbers); data.addJoinKeys(left, right, table_numbers);
return;
} }
else if (inequality != ASOF::Inequality::None)
ASOF::Inequality inequality = ASOF::getInequality(func.name);
if (data.is_asof && (inequality != ASOF::Inequality::None))
{ {
if (!data.is_asof)
throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'",
ErrorCodes::NOT_IMPLEMENTED);
if (data.asof_left_key || data.asof_right_key) if (data.asof_left_key || data.asof_right_key)
throwSyntaxException("ASOF JOIN expects exactly one inequality in ON section, unexpected " + queryToString(ast) + "."); throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'",
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
ASTPtr left = func.arguments->children.at(0); ASTPtr left = func.arguments->children.at(0);
ASTPtr right = func.arguments->children.at(1); ASTPtr right = func.arguments->children.at(1);
auto table_numbers = getTableNumbers(ast, left, right, data); auto table_numbers = getTableNumbers(ast, left, right, data);
data.addAsofJoinKeys(left, right, table_numbers, inequality); data.addAsofJoinKeys(left, right, table_numbers, inequality);
return;
} }
throwSyntaxException("Expected equals expression, got " + queryToString(ast) + ".");
} }
void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out) void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out)
{ {
if (const auto * ident = ast->as<ASTIdentifier>()) if (const auto * func = ast->as<ASTFunction>())
{
if (func->name == "arrayJoin")
throw Exception("Not allowed function in JOIN ON. Unexpected '" + queryToString(ast) + "'",
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
}
else if (const auto * ident = ast->as<ASTIdentifier>())
{ {
if (IdentifierSemantic::getColumnName(*ident)) if (IdentifierSemantic::getColumnName(*ident))
out.push_back(ident); out.push_back(ident);
@ -122,8 +135,8 @@ std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr
auto left_name = queryToString(*left_identifiers[0]); auto left_name = queryToString(*left_identifiers[0]);
auto right_name = queryToString(*right_identifiers[0]); auto right_name = queryToString(*right_identifiers[0]);
throwSyntaxException("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name throw Exception("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name
+ " are from the same table but from different arguments of equal function."); + " are from the same table but from different arguments of equal function", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
} }
return std::make_pair(left_idents_table, right_idents_table); return std::make_pair(left_idents_table, right_idents_table);
@ -214,12 +227,4 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIde
return table_number; return table_number;
} }
[[noreturn]] void CollectJoinOnKeysMatcher::throwSyntaxException(const String & msg)
{
throw Exception("Invalid expression for JOIN ON. " + msg +
" Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) "
"[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]",
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
}
} }

View File

@ -49,8 +49,7 @@ public:
static bool needChildVisit(const ASTPtr & node, const ASTPtr &) static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
{ {
if (auto * func = node->as<ASTFunction>()) if (auto * func = node->as<ASTFunction>())
if (func->name == "equals") return func->name == "and";
return false;
return true; return true;
} }
@ -61,8 +60,6 @@ private:
static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data); static std::pair<size_t, size_t> getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data);
static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases); static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases);
static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data); static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data);
[[noreturn]] static void throwSyntaxException(const String & msg);
}; };
/// Parse JOIN ON expression and collect ASTs for joined columns. /// Parse JOIN ON expression and collect ASTs for joined columns.

View File

@ -697,7 +697,7 @@ void executeQuery(
const char * end; const char * end;
/// If 'istr' is empty now, fetch next data into buffer. /// If 'istr' is empty now, fetch next data into buffer.
if (istr.buffer().size() == 0) if (!istr.hasPendingData())
istr.next(); istr.next();
size_t max_query_size = context.getSettingsRef().max_query_size; size_t max_query_size = context.getSettingsRef().max_query_size;

View File

@ -135,7 +135,13 @@ void writeCommonErrorMessage(
out << ": failed at position " << (last_token.begin - begin + 1); out << ": failed at position " << (last_token.begin - begin + 1);
if (last_token.type == TokenType::EndOfStream || last_token.type == TokenType::Semicolon) if (last_token.type == TokenType::EndOfStream || last_token.type == TokenType::Semicolon)
{
out << " (end of query)"; out << " (end of query)";
}
else
{
out << " ('" << std::string(last_token.begin, last_token.end - last_token.begin) << "')";
}
/// If query is multiline. /// If query is multiline.
const char * nl = find_first_symbols<'\n'>(begin, end); const char * nl = find_first_symbols<'\n'>(begin, end);

View File

@ -727,6 +727,11 @@ bool AvroConfluentRowInputFormat::readRow(MutableColumns & columns, RowReadExten
{ {
return false; return false;
} }
// skip tombstone records (kafka messages with null value)
if (in.available() == 0)
{
return false;
}
SchemaId schema_id = readConfluentSchemaId(in); SchemaId schema_id = readConfluentSchemaId(in);
const auto & deserializer = getOrCreateDeserializer(schema_id); const auto & deserializer = getOrCreateDeserializer(schema_id);
deserializer.deserializeRow(columns, *decoder, ext); deserializer.deserializeRow(columns, *decoder, ext);
@ -734,6 +739,12 @@ bool AvroConfluentRowInputFormat::readRow(MutableColumns & columns, RowReadExten
return true; return true;
} }
void AvroConfluentRowInputFormat::syncAfterError()
{
// skip until the end of current kafka message
in.tryIgnore(in.available());
}
const AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(SchemaId schema_id) const AvroDeserializer & AvroConfluentRowInputFormat::getOrCreateDeserializer(SchemaId schema_id)
{ {
auto it = deserializer_cache.find(schema_id); auto it = deserializer_cache.find(schema_id);

View File

@ -129,6 +129,9 @@ public:
String getName() const override { return "AvroConfluentRowInputFormat"; } String getName() const override { return "AvroConfluentRowInputFormat"; }
class SchemaRegistry; class SchemaRegistry;
protected:
bool allowSyncAfterError() const override { return true; }
void syncAfterError() override;
private: private:
std::shared_ptr<SchemaRegistry> schema_registry; std::shared_ptr<SchemaRegistry> schema_registry;
using SchemaId = uint32_t; using SchemaId = uint32_t;

View File

@ -1030,7 +1030,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
commands_for_part.emplace_back(command); commands_for_part.emplace_back(command);
} }
if (source_part->isStoredOnDisk() && !isStorageTouchedByMutations(storage_from_source_part, metadata_snapshot, commands_for_part, context_for_reading)) if (source_part->isStoredOnDisk() && !isStorageTouchedByMutations(
storage_from_source_part, metadata_snapshot, commands_for_part, context_for_reading))
{ {
LOG_TRACE(log, "Part {} doesn't change up to mutation version {}", source_part->name, future_part.part_info.mutation); LOG_TRACE(log, "Part {} doesn't change up to mutation version {}", source_part->name, future_part.part_info.mutation);
return data.cloneAndLoadDataPartOnSameDisk(source_part, "tmp_clone_", future_part.part_info, metadata_snapshot); return data.cloneAndLoadDataPartOnSameDisk(source_part, "tmp_clone_", future_part.part_info, metadata_snapshot);
@ -1042,7 +1043,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
BlockInputStreamPtr in = nullptr; BlockInputStreamPtr in = nullptr;
Block updated_header; Block updated_header;
std::optional<MutationsInterpreter> interpreter; std::unique_ptr<MutationsInterpreter> interpreter;
const auto data_settings = data.getSettings(); const auto data_settings = data.getSettings();
MutationCommands for_interpreter; MutationCommands for_interpreter;
@ -1057,7 +1058,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
if (!for_interpreter.empty()) if (!for_interpreter.empty())
{ {
interpreter.emplace(storage_from_source_part, metadata_snapshot, for_interpreter, context_for_reading, true); interpreter = std::make_unique<MutationsInterpreter>(
storage_from_source_part, metadata_snapshot, for_interpreter, context_for_reading, true);
in = interpreter->execute(); in = interpreter->execute();
updated_header = interpreter->getUpdatedHeader(); updated_header = interpreter->getUpdatedHeader();
in->setProgressCallback(MergeProgressCallback(merge_entry, watch_prev_elapsed, stage_progress)); in->setProgressCallback(MergeProgressCallback(merge_entry, watch_prev_elapsed, stage_progress));

View File

@ -11,6 +11,7 @@ namespace ErrorCodes
{ {
extern const int CANNOT_READ_ALL_DATA; extern const int CANNOT_READ_ALL_DATA;
extern const int ARGUMENT_OUT_OF_BOUND; extern const int ARGUMENT_OUT_OF_BOUND;
extern const int MEMORY_LIMIT_EXCEEDED;
} }
@ -43,66 +44,74 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
settings.save_marks_in_cache, settings.save_marks_in_cache,
data_part->getColumns().size()) data_part->getColumns().size())
{ {
size_t columns_num = columns.size(); try
column_positions.resize(columns_num);
read_only_offsets.resize(columns_num);
auto name_and_type = columns.begin();
for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
{ {
const auto & [name, type] = getColumnFromPart(*name_and_type); size_t columns_num = columns.size();
auto position = data_part->getColumnPosition(name);
if (!position && typeid_cast<const DataTypeArray *>(type.get())) column_positions.resize(columns_num);
read_only_offsets.resize(columns_num);
auto name_and_type = columns.begin();
for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
{ {
/// If array of Nested column is missing in part, const auto & [name, type] = getColumnFromPart(*name_and_type);
/// we have to read its offsets if they exist. auto position = data_part->getColumnPosition(name);
position = findColumnForOffsets(name);
read_only_offsets[i] = (position != std::nullopt); if (!position && typeid_cast<const DataTypeArray *>(type.get()))
{
/// If array of Nested column is missing in part,
/// we have to read its offsets if they exist.
position = findColumnForOffsets(name);
read_only_offsets[i] = (position != std::nullopt);
}
column_positions[i] = std::move(position);
} }
column_positions[i] = std::move(position); /// Do not use max_read_buffer_size, but try to lower buffer size with maximal size of granule to avoid reading much data.
auto buffer_size = getReadBufferSize(data_part, marks_loader, column_positions, all_mark_ranges);
if (!buffer_size || settings.max_read_buffer_size < buffer_size)
buffer_size = settings.max_read_buffer_size;
const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
if (uncompressed_cache)
{
auto buffer = std::make_unique<CachedCompressedReadBuffer>(
fullPath(data_part->volume->getDisk(), full_data_path),
[this, full_data_path, buffer_size]()
{
return data_part->volume->getDisk()->readFile(
full_data_path,
buffer_size,
0,
settings.min_bytes_to_use_direct_io,
settings.min_bytes_to_use_mmap_io);
},
uncompressed_cache);
if (profile_callback_)
buffer->setProfileCallback(profile_callback_, clock_type_);
cached_buffer = std::move(buffer);
data_buffer = cached_buffer.get();
}
else
{
auto buffer =
std::make_unique<CompressedReadBufferFromFile>(
data_part->volume->getDisk()->readFile(
full_data_path, buffer_size, 0, settings.min_bytes_to_use_direct_io, settings.min_bytes_to_use_mmap_io));
if (profile_callback_)
buffer->setProfileCallback(profile_callback_, clock_type_);
non_cached_buffer = std::move(buffer);
data_buffer = non_cached_buffer.get();
}
} }
catch (...)
/// Do not use max_read_buffer_size, but try to lower buffer size with maximal size of granule to avoid reading much data.
auto buffer_size = getReadBufferSize(data_part, marks_loader, column_positions, all_mark_ranges);
if (!buffer_size || settings.max_read_buffer_size < buffer_size)
buffer_size = settings.max_read_buffer_size;
const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
if (uncompressed_cache)
{ {
auto buffer = std::make_unique<CachedCompressedReadBuffer>( storage.reportBrokenPart(data_part->name);
fullPath(data_part->volume->getDisk(), full_data_path), throw;
[this, full_data_path, buffer_size]()
{
return data_part->volume->getDisk()->readFile(
full_data_path,
buffer_size,
0,
settings.min_bytes_to_use_direct_io,
settings.min_bytes_to_use_mmap_io);
},
uncompressed_cache);
if (profile_callback_)
buffer->setProfileCallback(profile_callback_, clock_type_);
cached_buffer = std::move(buffer);
data_buffer = cached_buffer.get();
}
else
{
auto buffer =
std::make_unique<CompressedReadBufferFromFile>(
data_part->volume->getDisk()->readFile(
full_data_path, buffer_size, 0, settings.min_bytes_to_use_direct_io, settings.min_bytes_to_use_mmap_io));
if (profile_callback_)
buffer->setProfileCallback(profile_callback_, clock_type_);
non_cached_buffer = std::move(buffer);
data_buffer = non_cached_buffer.get();
} }
} }
@ -155,10 +164,18 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
} }
catch (Exception & e) catch (Exception & e)
{ {
if (e.code() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
storage.reportBrokenPart(data_part->name);
/// Better diagnostics. /// Better diagnostics.
e.addMessage("(while reading column " + name + ")"); e.addMessage("(while reading column " + name + ")");
throw; throw;
} }
catch (...)
{
storage.reportBrokenPart(data_part->name);
throw;
}
} }
++from_mark; ++from_mark;

View File

@ -3,6 +3,7 @@ import random
import threading import threading
import time import time
import pytest import pytest
import io
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV from helpers.test_tools import TSV
@ -16,6 +17,11 @@ from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer, BrokerConnecti
from kafka.admin import NewTopic from kafka.admin import NewTopic
from kafka.protocol.admin import DescribeGroupsResponse_v1, DescribeGroupsRequest_v1 from kafka.protocol.admin import DescribeGroupsResponse_v1, DescribeGroupsRequest_v1
from kafka.protocol.group import MemberAssignment from kafka.protocol.group import MemberAssignment
import avro.schema
from confluent.schemaregistry.client import CachedSchemaRegistryClient
from confluent.schemaregistry.serializers.MessageSerializer import MessageSerializer
import socket import socket
from google.protobuf.internal.encoder import _VarintBytes from google.protobuf.internal.encoder import _VarintBytes
@ -102,6 +108,22 @@ def kafka_produce_protobuf_messages(topic, start_index, num_messages):
producer.flush() producer.flush()
print("Produced {} messages for topic {}".format(num_messages, topic)) print("Produced {} messages for topic {}".format(num_messages, topic))
def avro_confluent_message(schema_registry_client, value):
# type: (CachedSchemaRegistryClient, dict) -> str
serializer = MessageSerializer(schema_registry_client)
schema = avro.schema.make_avsc_object({
'name': 'row',
'type': 'record',
'fields': [
{'name': 'id', 'type': 'long'},
{'name': 'blockNo', 'type': 'int'},
{'name': 'val1', 'type': 'string'},
{'name': 'val2', 'type': 'float'},
{'name': 'val3', 'type': 'int'}
]
})
return serializer.encode_record_with_schema('test_subject', schema, value)
@pytest.mark.timeout(180) @pytest.mark.timeout(180)
def test_kafka_json_as_string(kafka_cluster): def test_kafka_json_as_string(kafka_cluster):
@ -139,8 +161,8 @@ def test_kafka_formats(kafka_cluster):
'{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', '{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n',
'{"id":"1","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"2","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"3","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"4","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"5","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"6","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"7","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"8","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"9","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"10","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"11","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"12","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"13","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"14","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"15","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', '{"id":"1","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"2","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"3","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"4","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"5","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"6","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"7","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"8","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"9","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"10","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"11","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"12","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"13","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"14","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"15","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n',
'{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', '{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n',
'' # tolerates
], ],
'supports_empty_value': True,
}, },
# JSONAsString doesn't fit to that test, and tested separately # JSONAsString doesn't fit to that test, and tested separately
'JSONCompactEachRow' : { 'JSONCompactEachRow' : {
@ -148,8 +170,8 @@ def test_kafka_formats(kafka_cluster):
'["0", 0, "AM", 0.5, 1]\n', '["0", 0, "AM", 0.5, 1]\n',
'["1", 0, "AM", 0.5, 1]\n["2", 0, "AM", 0.5, 1]\n["3", 0, "AM", 0.5, 1]\n["4", 0, "AM", 0.5, 1]\n["5", 0, "AM", 0.5, 1]\n["6", 0, "AM", 0.5, 1]\n["7", 0, "AM", 0.5, 1]\n["8", 0, "AM", 0.5, 1]\n["9", 0, "AM", 0.5, 1]\n["10", 0, "AM", 0.5, 1]\n["11", 0, "AM", 0.5, 1]\n["12", 0, "AM", 0.5, 1]\n["13", 0, "AM", 0.5, 1]\n["14", 0, "AM", 0.5, 1]\n["15", 0, "AM", 0.5, 1]\n', '["1", 0, "AM", 0.5, 1]\n["2", 0, "AM", 0.5, 1]\n["3", 0, "AM", 0.5, 1]\n["4", 0, "AM", 0.5, 1]\n["5", 0, "AM", 0.5, 1]\n["6", 0, "AM", 0.5, 1]\n["7", 0, "AM", 0.5, 1]\n["8", 0, "AM", 0.5, 1]\n["9", 0, "AM", 0.5, 1]\n["10", 0, "AM", 0.5, 1]\n["11", 0, "AM", 0.5, 1]\n["12", 0, "AM", 0.5, 1]\n["13", 0, "AM", 0.5, 1]\n["14", 0, "AM", 0.5, 1]\n["15", 0, "AM", 0.5, 1]\n',
'["0", 0, "AM", 0.5, 1]\n', '["0", 0, "AM", 0.5, 1]\n',
'' # tolerates
], ],
'supports_empty_value': True,
}, },
'JSONCompactEachRowWithNamesAndTypes' : { 'JSONCompactEachRowWithNamesAndTypes' : {
'data_sample' : [ 'data_sample' : [
@ -180,16 +202,16 @@ def test_kafka_formats(kafka_cluster):
'0,0,"AM",0.5,1\n', '0,0,"AM",0.5,1\n',
'1,0,"AM",0.5,1\n2,0,"AM",0.5,1\n3,0,"AM",0.5,1\n4,0,"AM",0.5,1\n5,0,"AM",0.5,1\n6,0,"AM",0.5,1\n7,0,"AM",0.5,1\n8,0,"AM",0.5,1\n9,0,"AM",0.5,1\n10,0,"AM",0.5,1\n11,0,"AM",0.5,1\n12,0,"AM",0.5,1\n13,0,"AM",0.5,1\n14,0,"AM",0.5,1\n15,0,"AM",0.5,1\n', '1,0,"AM",0.5,1\n2,0,"AM",0.5,1\n3,0,"AM",0.5,1\n4,0,"AM",0.5,1\n5,0,"AM",0.5,1\n6,0,"AM",0.5,1\n7,0,"AM",0.5,1\n8,0,"AM",0.5,1\n9,0,"AM",0.5,1\n10,0,"AM",0.5,1\n11,0,"AM",0.5,1\n12,0,"AM",0.5,1\n13,0,"AM",0.5,1\n14,0,"AM",0.5,1\n15,0,"AM",0.5,1\n',
'0,0,"AM",0.5,1\n', '0,0,"AM",0.5,1\n',
'' # tolerates
], ],
'supports_empty_value': True,
}, },
'TSV' : { 'TSV' : {
'data_sample' : [ 'data_sample' : [
'0\t0\tAM\t0.5\t1\n', '0\t0\tAM\t0.5\t1\n',
'1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n', '1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n',
'0\t0\tAM\t0.5\t1\n', '0\t0\tAM\t0.5\t1\n',
'' # tolerates
], ],
'supports_empty_value': True,
}, },
'CSVWithNames' : { 'CSVWithNames' : {
'data_sample' : [ 'data_sample' : [
@ -211,16 +233,16 @@ def test_kafka_formats(kafka_cluster):
"(0,0,'AM',0.5,1)", "(0,0,'AM',0.5,1)",
"(1,0,'AM',0.5,1),(2,0,'AM',0.5,1),(3,0,'AM',0.5,1),(4,0,'AM',0.5,1),(5,0,'AM',0.5,1),(6,0,'AM',0.5,1),(7,0,'AM',0.5,1),(8,0,'AM',0.5,1),(9,0,'AM',0.5,1),(10,0,'AM',0.5,1),(11,0,'AM',0.5,1),(12,0,'AM',0.5,1),(13,0,'AM',0.5,1),(14,0,'AM',0.5,1),(15,0,'AM',0.5,1)", "(1,0,'AM',0.5,1),(2,0,'AM',0.5,1),(3,0,'AM',0.5,1),(4,0,'AM',0.5,1),(5,0,'AM',0.5,1),(6,0,'AM',0.5,1),(7,0,'AM',0.5,1),(8,0,'AM',0.5,1),(9,0,'AM',0.5,1),(10,0,'AM',0.5,1),(11,0,'AM',0.5,1),(12,0,'AM',0.5,1),(13,0,'AM',0.5,1),(14,0,'AM',0.5,1),(15,0,'AM',0.5,1)",
"(0,0,'AM',0.5,1)", "(0,0,'AM',0.5,1)",
'' # tolerates
], ],
'supports_empty_value': True,
}, },
'TSVWithNames' : { 'TSVWithNames' : {
'data_sample' : [ 'data_sample' : [
'id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n', 'id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n',
'id\tblockNo\tval1\tval2\tval3\n1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n', 'id\tblockNo\tval1\tval2\tval3\n1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n',
'id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n', 'id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n',
'' # tolerates
], ],
'supports_empty_value': True,
}, },
'TSVWithNamesAndTypes' : { 'TSVWithNamesAndTypes' : {
'data_sample' : [ 'data_sample' : [
@ -389,25 +411,26 @@ def test_kafka_formats(kafka_cluster):
# # ], # # ],
# }, # },
# 'Avro' : { # 'Avro' : {
# # TODO: Not working at all: avro::Exception, e.what() = EOF reached
# #./contrib/libcxx/src/support/runtime/stdexcept_default.ipp:33: std::runtime_error::runtime_error(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) @ 0x22ce2080 in /usr/bin/clickhouse
# #./contrib/avro/lang/c++/api/Exception.hh:36: avro::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) @ 0x1de48a6e in /usr/bin/clickhouse
# #./contrib/avro/lang/c++/api/Stream.hh:336: avro::StreamReader::more() @ 0x22717f56 in /usr/bin/clickhouse
# #./contrib/avro/lang/c++/api/Stream.hh:0: avro::StreamReader::readBytes(unsigned char*, unsigned long) @ 0x22717d22 in /usr/bin/clickhouse
# #./contrib/avro/lang/c++/impl/BinaryDecoder.cc:170: avro::BinaryDecoder::decodeFixed(unsigned long, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&) @ 0x227177cb in /usr/bin/clickhouse
# #./contrib/avro/lang/c++/api/Specific.hh:216: avro::codec_traits<std::__1::array<unsigned char, 4ul> >::decode(avro::Decoder&, std::__1::array<unsigned char, 4ul>&) @ 0x22743624 in /usr/bin/clickhouse
# #./contrib/avro/lang/c++/api/Specific.hh:342: void avro::decode<std::__1::array<unsigned char, 4ul> >(avro::Decoder&, std::__1::array<unsigned char, 4ul>&) @ 0x2272970d in /usr/bin/clickhouse
# #./contrib/avro/lang/c++/impl/DataFile.cc:487: avro::DataFileReaderBase::readHeader() @ 0x2272608d in /usr/bin/clickhouse
# #./contrib/avro/lang/c++/impl/DataFile.cc:280: avro::DataFileReaderBase::DataFileReaderBase(std::__1::unique_ptr<avro::InputStream, std::__1::default_delete<avro::InputStream> >) @ 0x22726923 in /usr/bin/clickhouse
# #./src/Processors/Formats/Impl/AvroRowInputFormat.cpp:571: DB::AvroRowInputFormat::AvroRowInputFormat(DB::Block const&, DB::ReadBuffer&, DB::RowInputFormatParams) @ 0x1de19c9b in /usr/bin/clickhouse
# 'data_sample' : [ # 'data_sample' : [
# #'\x4f\x62\x6a\x01\x04\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x0c\x73\x6e\x61\x70\x70\x79\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x80\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x62\x79\x74\x65\x73\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x00\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83\x02\x20\x0a\x24\x00\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x80\xaa\x4a\xe3\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83', # '\x4f\x62\x6a\x01\x04\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x82\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x73\x74\x72\x69\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x08\x6e\x75\x6c\x6c\x00\x8d\x1f\xf2\x17\x71\xa4\x2e\xe4\xc9\x0a\x23\x67\x12\xaa\xc6\xc0\x02\x14\x00\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x8d\x1f\xf2\x17\x71\xa4\x2e\xe4\xc9\x0a\x23\x67\x12\xaa\xc6\xc0',
# #'\x4f\x62\x6a\x01\x04\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x0c\x73\x6e\x61\x70\x70\x79\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x80\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x62\x79\x74\x65\x73\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x00\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83\x1e\x9e\x01\x96\x01\x28\x02\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x04\x15\x0a\x00\x06\x15\x0a\x00\x08\x15\x0a\x00\x0a\x15\x0a\x00\x0c\x15\x0a\x00\x0e\x15\x0a\x00\x10\x15\x0a\x00\x12\x15\x0a\x00\x14\x15\x0a\x00\x16\x15\x0a\x00\x18\x15\x0a\x00\x1a\x15\x0a\x00\x1c\x15\x0a\x24\x1e\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x49\x73\x4d\xca\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83', # '\x4f\x62\x6a\x01\x04\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x82\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x73\x74\x72\x69\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x08\x6e\x75\x6c\x6c\x00\xeb\x9d\x51\x82\xf2\x11\x3d\x0b\xc5\x92\x97\xb2\x07\x6d\x72\x5a\x1e\xac\x02\x02\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x04\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x06\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x08\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x0a\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x0c\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x0e\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x10\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x12\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x14\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x16\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x18\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x1a\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x1c\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x1e\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\xeb\x9d\x51\x82\xf2\x11\x3d\x0b\xc5\x92\x97\xb2\x07\x6d\x72\x5a',
# #'\x4f\x62\x6a\x01\x04\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x0c\x73\x6e\x61\x70\x70\x79\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x80\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x62\x79\x74\x65\x73\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x00\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83\x02\x20\x0a\x24\x00\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x80\xaa\x4a\xe3\x73\x6e\x66\xa3\x62\x9f\x88\xed\x28\x08\x67\xf0\x75\xaf\x23\x83', # '\x4f\x62\x6a\x01\x04\x16\x61\x76\x72\x6f\x2e\x73\x63\x68\x65\x6d\x61\x82\x03\x7b\x22\x74\x79\x70\x65\x22\x3a\x22\x72\x65\x63\x6f\x72\x64\x22\x2c\x22\x6e\x61\x6d\x65\x22\x3a\x22\x72\x6f\x77\x22\x2c\x22\x66\x69\x65\x6c\x64\x73\x22\x3a\x5b\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x69\x64\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x6c\x6f\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x62\x6c\x6f\x63\x6b\x4e\x6f\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x31\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x73\x74\x72\x69\x6e\x67\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x32\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x66\x6c\x6f\x61\x74\x22\x7d\x2c\x7b\x22\x6e\x61\x6d\x65\x22\x3a\x22\x76\x61\x6c\x33\x22\x2c\x22\x74\x79\x70\x65\x22\x3a\x22\x69\x6e\x74\x22\x7d\x5d\x7d\x14\x61\x76\x72\x6f\x2e\x63\x6f\x64\x65\x63\x08\x6e\x75\x6c\x6c\x00\x73\x65\x4f\x7c\xd9\x33\xe1\x18\xdd\x30\xe8\x22\x2a\x58\x20\x6f\x02\x14\x00\x00\x04\x41\x4d\x00\x00\x00\x3f\x02\x73\x65\x4f\x7c\xd9\x33\xe1\x18\xdd\x30\xe8\x22\x2a\x58\x20\x6f',
# # ''
# ], # ],
# }, # },
# TODO: test for AvroConfluence 'AvroConfluent' : {
'data_sample': [
avro_confluent_message(cluster.schema_registry_client, {'id':0L,'blockNo':0,'val1':unicode('AM'),'val2':0.5,"val3":1}),
''.join(map(lambda id: avro_confluent_message(cluster.schema_registry_client, {'id':id,'blockNo':0,'val1':unicode('AM'),'val2':0.5,"val3":1}), range(1,16))),
avro_confluent_message(cluster.schema_registry_client, {'id':0L,'blockNo':0,'val1':unicode('AM'),'val2':0.5,"val3":1}),
],
'extra_settings': ", format_avro_schema_registry_url='http://{}:{}'".format(
cluster.schema_registry_host,
cluster.schema_registry_port
),
'supports_empty_value': True,
}
# 'Arrow' : { # 'Arrow' : {
# # Not working at all: DB::Exception: Error while opening a table: Invalid: File is too small: 0, Stack trace (when copying this message, always include the lines below): # # Not working at all: DB::Exception: Error while opening a table: Invalid: File is too small: 0, Stack trace (when copying this message, always include the lines below):
# # /src/Common/Exception.cpp:37: DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int) @ 0x15c2d2a3 in /usr/bin/clickhouse # # /src/Common/Exception.cpp:37: DB::Exception::Exception(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int) @ 0x15c2d2a3 in /usr/bin/clickhouse
@ -452,10 +475,15 @@ def test_kafka_formats(kafka_cluster):
# }, # },
} }
for format_name in all_formats: for format_name, format_opts in all_formats.items():
print('Set up {}'.format(format_name)) print('Set up {}'.format(format_name))
topic_name='format_tests_{}'.format(format_name) topic_name='format_tests_{}'.format(format_name)
kafka_produce(topic_name, all_formats[format_name]['data_sample']) data_sample = format_opts['data_sample']
data_prefix = []
# prepend empty value when supported
if format_opts.get('supports_empty_value', False):
data_prefix = data_prefix + ['']
kafka_produce(topic_name, data_prefix + data_sample)
instance.query(''' instance.query('''
DROP TABLE IF EXISTS test.kafka_{format_name}; DROP TABLE IF EXISTS test.kafka_{format_name};
@ -476,34 +504,35 @@ def test_kafka_formats(kafka_cluster):
CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv Engine=Log AS CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv Engine=Log AS
SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name}; SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name};
'''.format(topic_name=topic_name, format_name=format_name, extra_settings=all_formats[format_name].get('extra_settings') or '')) '''.format(topic_name=topic_name, format_name=format_name, extra_settings=format_opts.get('extra_settings') or ''))
time.sleep(12) time.sleep(12)
for format_name in all_formats: for format_name, format_opts in all_formats.items():
print('Checking {}'.format(format_name)) print('Checking {}'.format(format_name))
topic_name='format_tests_{}'.format(format_name) topic_name='format_tests_{}'.format(format_name)
# shift offsets by 1 if format supports empty value
offsets = [1,2,3] if format_opts.get('supports_empty_value', False) else [0,1,2]
result = instance.query('SELECT * FROM test.kafka_{format_name}_mv;'.format(format_name=format_name)) result = instance.query('SELECT * FROM test.kafka_{format_name}_mv;'.format(format_name=format_name))
expected = '''\ expected = '''\
0 0 AM 0.5 1 {topic_name} 0 0 0 0 AM 0.5 1 {topic_name} 0 {offset_0}
1 0 AM 0.5 1 {topic_name} 0 1 1 0 AM 0.5 1 {topic_name} 0 {offset_1}
2 0 AM 0.5 1 {topic_name} 0 1 2 0 AM 0.5 1 {topic_name} 0 {offset_1}
3 0 AM 0.5 1 {topic_name} 0 1 3 0 AM 0.5 1 {topic_name} 0 {offset_1}
4 0 AM 0.5 1 {topic_name} 0 1 4 0 AM 0.5 1 {topic_name} 0 {offset_1}
5 0 AM 0.5 1 {topic_name} 0 1 5 0 AM 0.5 1 {topic_name} 0 {offset_1}
6 0 AM 0.5 1 {topic_name} 0 1 6 0 AM 0.5 1 {topic_name} 0 {offset_1}
7 0 AM 0.5 1 {topic_name} 0 1 7 0 AM 0.5 1 {topic_name} 0 {offset_1}
8 0 AM 0.5 1 {topic_name} 0 1 8 0 AM 0.5 1 {topic_name} 0 {offset_1}
9 0 AM 0.5 1 {topic_name} 0 1 9 0 AM 0.5 1 {topic_name} 0 {offset_1}
10 0 AM 0.5 1 {topic_name} 0 1 10 0 AM 0.5 1 {topic_name} 0 {offset_1}
11 0 AM 0.5 1 {topic_name} 0 1 11 0 AM 0.5 1 {topic_name} 0 {offset_1}
12 0 AM 0.5 1 {topic_name} 0 1 12 0 AM 0.5 1 {topic_name} 0 {offset_1}
13 0 AM 0.5 1 {topic_name} 0 1 13 0 AM 0.5 1 {topic_name} 0 {offset_1}
14 0 AM 0.5 1 {topic_name} 0 1 14 0 AM 0.5 1 {topic_name} 0 {offset_1}
15 0 AM 0.5 1 {topic_name} 0 1 15 0 AM 0.5 1 {topic_name} 0 {offset_1}
0 0 AM 0.5 1 {topic_name} 0 2 0 0 AM 0.5 1 {topic_name} 0 {offset_2}
'''.format(topic_name=topic_name) '''.format(topic_name=topic_name, offset_0 = offsets[0], offset_1 = offsets[1], offset_2 = offsets[2])
assert TSV(result) == TSV(expected), 'Proper result for format: {}'.format(format_name) assert TSV(result) == TSV(expected), 'Proper result for format: {}'.format(format_name)

View File

@ -7,24 +7,24 @@ insert into array_intersect values ('2019-01-01', [1,2]);
insert into array_intersect values ('2019-01-01', [1]); insert into array_intersect values ('2019-01-01', [1]);
insert into array_intersect values ('2019-01-01', []); insert into array_intersect values ('2019-01-01', []);
select arrayIntersect(arr, [1,2]) from array_intersect order by arr; select arraySort(arrayIntersect(arr, [1,2])) from array_intersect order by arr;
select arrayIntersect(arr, []) from array_intersect order by arr; select arraySort(arrayIntersect(arr, [])) from array_intersect order by arr;
select arrayIntersect([], arr) from array_intersect order by arr; select arraySort(arrayIntersect([], arr)) from array_intersect order by arr;
select arrayIntersect([1,2], arr) from array_intersect order by arr; select arraySort(arrayIntersect([1,2], arr)) from array_intersect order by arr;
select arrayIntersect([1,2], [1,2,3,4]) from array_intersect order by arr; select arraySort(arrayIntersect([1,2], [1,2,3,4])) from array_intersect order by arr;
select arrayIntersect([], []) from array_intersect order by arr; select arraySort(arrayIntersect([], [])) from array_intersect order by arr;
optimize table array_intersect; optimize table array_intersect;
select arrayIntersect(arr, [1,2]) from array_intersect order by arr; select arraySort(arrayIntersect(arr, [1,2])) from array_intersect order by arr;
select arrayIntersect(arr, []) from array_intersect order by arr; select arraySort(arrayIntersect(arr, [])) from array_intersect order by arr;
select arrayIntersect([], arr) from array_intersect order by arr; select arraySort(arrayIntersect([], arr)) from array_intersect order by arr;
select arrayIntersect([1,2], arr) from array_intersect order by arr; select arraySort(arrayIntersect([1,2], arr)) from array_intersect order by arr;
select arrayIntersect([1,2], [1,2,3,4]) from array_intersect order by arr; select arraySort(arrayIntersect([1,2], [1,2,3,4])) from array_intersect order by arr;
select arrayIntersect([], []) from array_intersect order by arr; select arraySort(arrayIntersect([], [])) from array_intersect order by arr;
drop table if exists array_intersect; drop table if exists array_intersect;
select '-'; select '-';
select arrayIntersect([-100], [156]); select arraySort(arrayIntersect([-100], [156]));
select arrayIntersect([1], [257]); select arraySort(arrayIntersect([1], [257]));

View File

@ -5,5 +5,5 @@
[2] [2]
[] []
[] []
[3,1,2] [1,2,3]
[] []

View File

@ -1,9 +1,9 @@
SELECT arrayIntersect(['a', 'b', 'c'], ['a', 'a']); SELECT arraySort(arrayIntersect(['a', 'b', 'c'], ['a', 'a']));
SELECT arrayIntersect([1, 1], [2, 2]); SELECT arraySort(arrayIntersect([1, 1], [2, 2]));
SELECT arrayIntersect([1, 1], [1, 2]); SELECT arraySort(arrayIntersect([1, 1], [1, 2]));
SELECT arrayIntersect([1, 1, 1], [3], [2, 2, 2]); SELECT arraySort(arrayIntersect([1, 1, 1], [3], [2, 2, 2]));
SELECT arrayIntersect([1, 2], [1, 2], [2]); SELECT arraySort(arrayIntersect([1, 2], [1, 2], [2]));
SELECT arrayIntersect([1, 1], [2, 1], [2, 2], [1]); SELECT arraySort(arrayIntersect([1, 1], [2, 1], [2, 2], [1]));
SELECT arrayIntersect([]); SELECT arraySort(arrayIntersect([]));
SELECT arrayIntersect([1, 2, 3]); SELECT arraySort(arrayIntersect([1, 2, 3]));
SELECT arrayIntersect([1, 1], [2, 1], [2, 2], [2, 2, 2]); SELECT arraySort(arrayIntersect([1, 1], [2, 1], [2, 2], [2, 2, 2]));

View File

@ -1,2 +1,2 @@
[0,3,2] id2 [0,2,3] id2
[3,1,2] id1 [1,2,3] id1

View File

@ -11,7 +11,7 @@ INSERT INTO tags(id, seqs) VALUES ('id1', [1,2,3]), ('id2', [0,2,3]), ('id1', [1
WITH WITH
(SELECT [0, 1, 2, 3]) AS arr1 (SELECT [0, 1, 2, 3]) AS arr1
SELECT arrayIntersect(argMax(seqs, create_time), arr1) AS common, id SELECT arraySort(arrayIntersect(argMax(seqs, create_time), arr1)) AS common, id
FROM tags FROM tags
WHERE id LIKE 'id%' WHERE id LIKE 'id%'
GROUP BY id; GROUP BY id;

View File

@ -0,0 +1,11 @@
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON (arrayJoin([1]) = B.b); -- { serverError 403 }
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON (A.a = arrayJoin([1])); -- { serverError 403 }
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON equals(a); -- { serverError 62 }
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON less(a); -- { serverError 62 }
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b OR a = b; -- { serverError 48 }
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a > b; -- { serverError 48 }
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a < b; -- { serverError 48 }
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a >= b; -- { serverError 48 }
SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a <= b; -- { serverError 48 }

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
SELECT groupArrayMovingSum(10)(0) FROM remote('127.0.0.{1,2}', numbers(0))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +0,0 @@
# Copyright 2020, Altinity LTD. All Rights Reserved.
#
# All information contained herein is, and remains the property
# of Altinity LTD. Any dissemination of this information or
# reproduction of this material is strictly forbidden unless
# prior written permission is obtained from Altinity LTD.
#

View File

@ -3,4 +3,5 @@ from testflows.core import *
@TestFeature @TestFeature
@Name("privileges") @Name("privileges")
def feature(self): def feature(self):
Feature(run=load("rbac.tests.privileges.insert", "feature"), flags=TE)
Feature(run=load("rbac.tests.privileges.select", "feature"), flags=TE) Feature(run=load("rbac.tests.privileges.select", "feature"), flags=TE)

View File

@ -0,0 +1,538 @@
from contextlib import contextmanager
import json
from testflows.core import *
from testflows.asserts import error
from rbac.requirements import *
import rbac.tests.errors as errors
table_types = {
"MergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, (a, b), 111)",
"ReplacingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplacingMergeTree(d, (a, b), 111)",
"SummingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, (a, b), 111)",
"AggregatingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, (a, b), 111)",
"CollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, (a, b), 111, y);",
"VersionedCollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32, version UInt64, sign Int8, INDEX a (b * y, d) TYPE minmax GRANULARITY 3) ENGINE = VersionedCollapsingMergeTree(sign, version) ORDER BY tuple()",
"GraphiteMergeTree": "CREATE TABLE {name} (key UInt32, Path String, Time DateTime, d Date, a String, b UInt8, x String, y Int8, z UInt32, Value Float64, Version UInt32, col UInt64, INDEX a (key * Value, Time) TYPE minmax GRANULARITY 3) ENGINE = GraphiteMergeTree('graphite_rollup_example') ORDER BY tuple()"
}
table_requirements ={
"MergeTree": RQ_SRS_006_RBAC_Privileges_Insert_MergeTree("1.0"),
"ReplacingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_ReplacingMergeTree("1.0"),
"SummingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_SummingMergeTree("1.0"),
"AggregatingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_AggregatingMergeTree("1.0"),
"CollapsingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_CollapsingMergeTree("1.0"),
"VersionedCollapsingMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_VersionedCollapsingMergeTree("1.0"),
"GraphiteMergeTree": RQ_SRS_006_RBAC_Privileges_Insert_GraphiteMergeTree("1.0"),
}
@contextmanager
def table(node, name, table_type="MergeTree"):
try:
with Given(f"I have a {table_type} table"):
node.query(table_types[table_type].format(name=name))
yield
finally:
with Finally("I drop the table"):
node.query(f"DROP TABLE IF EXISTS {name}")
@contextmanager
def user(node, name):
try:
names = name.split(",")
for i in names:
with Given("I have a user"):
node.query(f"CREATE USER OR REPLACE {i}")
yield
finally:
for i in names:
with Finally("I drop the user"):
node.query(f"DROP USER IF EXISTS {name}")
@contextmanager
def role(node, role):
try:
roles = role.split(",")
for j in roles:
with Given("I have a role"):
node.query(f"CREATE ROLE OR REPLACE {j}")
yield
finally:
for j in roles:
with Finally("I drop the role"):
node.query(f"DROP ROLE IF EXISTS {role}")
def input_output_equality_check(node, input_columns, input_data):
data_list = [x.strip("'") for x in input_data.split(",")]
input_dict = dict(zip(input_columns.split(","), data_list))
output_dict = json.loads(node.query(f"select {input_columns} from merge_tree format JSONEachRow").output)
output_dict = {k:str(v) for (k,v) in output_dict.items()}
return input_dict == output_dict
@TestScenario
def without_privilege(self, table_type, node=None):
"""Check that user without insert privilege on a table is not able to insert on that table.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"):
with When("I run INSERT without privilege"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings = [("user","user0")],
exitcode=exitcode, message=message)
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_Grant("1.0"),
)
def user_with_privilege(self, table_type, node=None):
"""Check that user can insert into a table on which they have insert privilege and the inserted data is correct.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"):
with When("I grant privilege"):
node.query("GRANT INSERT ON merge_tree TO user0")
with And("I use INSERT"):
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user0")])
with Then("I check the insert functioned"):
output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_Revoke("1.0"),
)
def user_with_revoked_privilege(self, table_type, node=None):
"""Check that user is unable to insert into a table after insert privilege on that table has been revoked from user.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"):
with When("I grant privilege"):
node.query("GRANT INSERT ON merge_tree TO user0")
with And("I revoke privilege"):
node.query("REVOKE INSERT ON merge_tree FROM user0")
with And("I use INSERT"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')",
settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
def user_with_privilege_on_columns(self, table_type):
Scenario(run=user_column_privileges,
examples=Examples("grant_columns revoke_columns insert_columns_fail insert_columns_pass data_fail data_pass table_type",
[tuple(list(row)+[table_type]) for row in user_column_privileges.examples]))
@TestOutline(Scenario)
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_Column("1.0"),
)
@Examples("grant_columns revoke_columns insert_columns_fail insert_columns_pass data_fail data_pass", [
("d", "d", "x", "d", '\'woo\'', '\'2020-01-01\''),
("d,a", "d", "x", "d", '\'woo\'', '\'2020-01-01\''),
("d,a,b", "d,a,b", "x", "d,b", '\'woo\'', '\'2020-01-01\',9'),
("d,a,b", "b", "y", "d,a,b", '9', '\'2020-01-01\',\'woo\',9')
])
def user_column_privileges(self, grant_columns, insert_columns_pass, data_fail, data_pass, table_type,
revoke_columns=None, insert_columns_fail=None, node=None):
"""Check that user is able to insert on granted columns
and unable to insert on not granted or revoked columns.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"):
with When("I grant insert privilege"):
node.query(f"GRANT INSERT({grant_columns}) ON merge_tree TO user0")
if insert_columns_fail is not None:
with And("I insert into not granted column"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query(f"INSERT INTO merge_tree ({insert_columns_fail}) VALUES ({data_fail})",
settings=[("user","user0")], exitcode=exitcode, message=message)
with And("I insert into granted column"):
node.query(f"INSERT INTO merge_tree ({insert_columns_pass}) VALUES ({data_pass})",
settings=[("user","user0")])
with Then("I check the insert functioned"):
input_equals_output = input_output_equality_check(node, insert_columns_pass, data_pass)
assert input_equals_output, error()
if revoke_columns is not None:
with When("I revoke insert privilege from columns"):
node.query(f"REVOKE INSERT({revoke_columns}) ON merge_tree FROM user0")
with And("I insert into revoked columns"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query(f"INSERT INTO merge_tree ({insert_columns_pass}) VALUES ({data_pass})",
settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_Grant("1.0"),
)
def role_with_privilege(self, table_type, node=None):
"""Check that user can insert into a table after it is granted a role that
has the insert privilege for that table.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"), role(node, "role0"):
with When("I grant insert privilege to a role"):
node.query("GRANT INSERT ON merge_tree TO role0")
with And("I grant role to the user"):
node.query("GRANT role0 TO user0")
with And("I insert into a table"):
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user0")])
with Then("I check that I can read inserted data"):
output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_Revoke("1.0"),
)
def role_with_revoked_privilege(self, table_type, node=None):
"""Check that user with a role that has insert privilege on a table
is unable to insert into that table after insert privilege
has been revoked from the role.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"), role(node, "role0"):
with When("I grant privilege to a role"):
node.query("GRANT INSERT ON merge_tree TO role0")
with And("I grant the role to a user"):
node.query("GRANT role0 TO user0")
with And("I revoke privilege from the role"):
node.query("REVOKE INSERT ON merge_tree FROM role0")
with And("I insert into the table"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')",
settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
def user_with_revoked_role(self, table_type, node=None):
"""Check that user with a role that has insert privilege on a table
is unable to insert into that table after the role with insert
privilege has been revoked from the user.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"), role(node, "role0"):
with When("I grant privilege to a role"):
node.query("GRANT INSERT ON merge_tree TO role0")
with And("I grant the role to a user"):
node.query("GRANT role0 TO user0")
with And("I revoke the role from the user"):
node.query("REVOKE role0 FROM user0")
with And("I insert into the table"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')",
settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
def role_with_privilege_on_columns(self, table_type):
Scenario(run=role_column_privileges,
examples=Examples("grant_columns revoke_columns insert_columns_fail insert_columns_pass data_fail data_pass table_type",
[tuple(list(row)+[table_type]) for row in role_column_privileges.examples]))
@TestOutline(Scenario)
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_Column("1.0"),
)
@Examples("grant_columns revoke_columns insert_columns_fail insert_columns_pass data_fail data_pass", [
("d", "d", "x", "d", '\'woo\'', '\'2020-01-01\''),
("d,a", "d", "x", "d", '\'woo\'', '\'2020-01-01\''),
("d,a,b", "d,a,b", "x", "d,b", '\'woo\'', '\'2020-01-01\',9'),
("d,a,b", "b", "y", "d,a,b", '9', '\'2020-01-01\',\'woo\',9')
])
def role_column_privileges(self, grant_columns, insert_columns_pass, data_fail, data_pass,
table_type, revoke_columns=None, insert_columns_fail=None, node=None):
"""Check that user with a role is able to insert on granted columns and unable
to insert on not granted or revoked columns.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"), role(node, "role0"):
with When("I grant insert privilege"):
node.query(f"GRANT INSERT({grant_columns}) ON merge_tree TO role0")
with And("I grant the role to a user"):
node.query("GRANT role0 TO user0")
if insert_columns_fail is not None:
with And("I insert into not granted column"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query(f"INSERT INTO merge_tree ({insert_columns_fail}) VALUES ({data_fail})",
settings=[("user","user0")], exitcode=exitcode, message=message)
with And("I insert into granted column"):
node.query(f"INSERT INTO merge_tree ({insert_columns_pass}) VALUES ({data_pass})",
settings=[("user","user0")])
with Then("I check the insert functioned"):
input_equals_output = input_output_equality_check(node, insert_columns_pass, data_pass)
assert input_equals_output, error()
if revoke_columns is not None:
with When("I revoke insert privilege from columns"):
node.query(f"REVOKE INSERT({revoke_columns}) ON merge_tree FROM role0")
with And("I insert into revoked columns"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query(f"INSERT INTO merge_tree ({insert_columns_pass}) VALUES ({data_pass})",
settings=[("user","user0")], exitcode=exitcode, message=message)
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_Cluster("1.0"),
)
def user_with_privilege_on_cluster(self, table_type, node=None):
"""Check that user is able to insert on a table with
privilege granted on a cluster.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
try:
with Given("I have a user on a cluster"):
node.query("CREATE USER OR REPLACE user0 ON CLUSTER sharded_cluster")
with When("I grant insert privilege on a cluster without the node with the table"):
node.query("GRANT ON CLUSTER cluster23 INSERT ON merge_tree TO user0")
with And("I insert into the table expecting a fail"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user0")],
exitcode=exitcode, message=message)
with And("I grant insert privilege on cluster including all nodes"):
node.query("GRANT ON CLUSTER sharded_cluster INSERT ON merge_tree TO user0")
with And("I revoke insert privilege on cluster without the table node"):
node.query("REVOKE ON CLUSTER cluster23 INSERT ON merge_tree FROM user0")
with And("I insert into the table"):
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user0")])
with Then("I check that I can read inserted data"):
output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
assert output == '{"d":"2020-01-01"}', error()
finally:
with Finally("I drop the user"):
node.query("DROP USER user0 ON CLUSTER sharded_cluster")
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Grant("1.0"),
)
def user_with_privilege_from_user_with_grant_option(self, table_type, node=None):
"""Check that user is able to insert on a table when granted privilege
from another user with grant option.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0,user1"):
with When("I grant privilege with grant option to user"):
node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
with And("I grant privilege on a column I dont have permission on"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("GRANT INSERT(b) ON merge_tree TO user1", settings=[("user","user0")],
exitcode=exitcode, message=message)
with And("I grant privilege to another user via grant option"):
node.query("GRANT INSERT(d) ON merge_tree TO user1", settings=[("user","user0")])
with And("I insert into a table"):
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user1")])
with Then("I check that I can read inserted data"):
output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Grant("1.0"),
)
def role_with_privilege_from_user_with_grant_option(self, table_type, node=None):
"""Check that user is able to insert on a table when granted a role with
insert privilege that was granted by another user with grant option.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0,user1"), role(node, "role0"):
with When("I grant privilege with grant option to user"):
node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
with And("I grant privilege on a column I dont have permission on"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("GRANT INSERT(b) ON merge_tree TO role0", settings=[("user","user0")],
exitcode=exitcode, message=message)
with And("I grant privilege to a role via grant option"):
node.query("GRANT INSERT(d) ON merge_tree TO role0", settings=[("user","user0")])
with And("I grant the role to another user"):
node.query("GRANT role0 TO user1")
with And("I insert into a table"):
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user1")])
with Then("I check that I can read inserted data"):
output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Grant("1.0"),
)
def user_with_privilege_from_role_with_grant_option(self, table_type, node=None):
"""Check that user is able to insert on a table when granted privilege from a role with grant option
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0,user1"), role(node, "role0"):
with When("I grant privilege with grant option to a role"):
node.query("GRANT INSERT(d) ON merge_tree TO role0 WITH GRANT OPTION")
with When("I grant role to a user"):
node.query("GRANT role0 TO user0")
with And("I grant privilege on a column I dont have permission on"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("GRANT INSERT(b) ON merge_tree TO user1", settings=[("user","user0")],
exitcode=exitcode, message=message)
with And("I grant privilege to a user via grant option"):
node.query("GRANT INSERT(d) ON merge_tree TO user1", settings=[("user","user0")])
with And("I insert into a table"):
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user1")])
with Then("I check that I can read inserted data"):
output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Grant("1.0"),
)
def role_with_privilege_from_role_with_grant_option(self, table_type, node=None):
"""Check that a user is able to insert on a table with a role that was granted privilege
by another role with grant option
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0,user1"), role(node, "role0,role1"):
with When("I grant privilege with grant option to role"):
node.query("GRANT INSERT(d) ON merge_tree TO role0 WITH GRANT OPTION")
with And("I grant the role to a user"):
node.query("GRANT role0 TO user0")
with And("I grant privilege on a column I dont have permission on"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("GRANT INSERT(b) ON merge_tree TO role1", settings=[("user","user0")],
exitcode=exitcode, message=message)
with And("I grant privilege to another role via grant option"):
node.query("GRANT INSERT(d) ON merge_tree TO role1", settings=[("user","user0")])
with And("I grant the second role to another user"):
node.query("GRANT role1 TO user1")
with And("I insert into a table"):
node.query("INSERT INTO merge_tree (d) VALUES ('2020-01-01')", settings=[("user","user1")])
with Then("I check that I can read inserted data"):
output = node.query("SELECT d FROM merge_tree FORMAT JSONEachRow").output
assert output == '{"d":"2020-01-01"}', error()
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Revoke("1.0"),
)
def revoke_privilege_from_user_via_user_with_grant_option(self, table_type, node=None):
"""Check that user is unable to revoke a column they don't have access to from a user.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0,user1"):
with When("I grant privilege with grant option to user"):
node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
with Then("I revoke privilege on a column the user with grant option does not have access to"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("REVOKE INSERT(b) ON merge_tree FROM user1", settings=[("user","user0")],
exitcode=exitcode, message=message)
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Revoke("1.0"),
)
def revoke_privilege_from_role_via_user_with_grant_option(self, table_type, node=None):
"""Check that user is unable to revoke a column they dont have acces to from a role.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"), role(node, "role0"):
with When("I grant privilege with grant option to user"):
node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
with Then("I revoke privilege on a column the user with grant option does not have access to"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("REVOKE INSERT(b) ON merge_tree FROM role0", settings=[("user","user0")],
exitcode=exitcode, message=message)
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Revoke("1.0"),
)
def revoke_privilege_from_user_via_role_with_grant_option(self, table_type, node=None):
"""Check that user with a role is unable to revoke a column they dont have acces to from a user.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0,user1"), role(node, "role0"):
with When("I grant privilege with grant option to a role"):
node.query("GRANT INSERT(d) ON merge_tree TO role0 WITH GRANT OPTION")
with And("I grant the role to a user"):
node.query("GRANT role0 TO user0")
with Then("I revoke privilege on a column the user with grant option does not have access to"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("REVOKE INSERT(b) ON merge_tree FROM user1", settings=[("user","user0")],
exitcode=exitcode, message=message)
@TestScenario
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert_GrantOption_Revoke("1.0"),
)
def revoke_privilege_from_role_via_role_with_grant_option(self, table_type, node=None):
"""Check that user with a role is unable to revoke a column they dont have acces to from a role.
"""
if node is None:
node = self.context.node
with table(node, "merge_tree", table_type):
with user(node, "user0"), role(node, "role0,role1"):
with When("I grant privilege with grant option to a role"):
node.query("GRANT INSERT(d) ON merge_tree TO user0 WITH GRANT OPTION")
with And("I grant the role to a user"):
node.query("GRANT role0 TO user0")
with Then("I revoke privilege on a column the user with grant option does not have access to"):
exitcode, message = errors.not_enough_privileges(name="user0")
node.query("REVOKE INSERT(b) ON merge_tree FROM role1", settings=[("user","user0")],
exitcode=exitcode, message=message)
@TestOutline(Feature)
@Requirements(
RQ_SRS_006_RBAC_Privileges_Insert("1.0"),
)
@Examples("table_type", [
(table_type, Requirements(requirement)) for table_type, requirement in table_requirements.items()
])
@Name("insert")
def feature(self, table_type, node="clickhouse1"):
self.context.node = self.context.cluster.node(node)
self.context.node1 = self.context.cluster.node("clickhouse1")
self.context.node2 = self.context.cluster.node("clickhouse2")
self.context.node3 = self.context.cluster.node("clickhouse3")
Scenario(test=without_privilege)(table_type=table_type)
Scenario(test=user_with_privilege)(table_type=table_type)
Scenario(test=user_with_revoked_privilege)(table_type=table_type)
Scenario(test=user_with_privilege_on_columns)(table_type=table_type)
Scenario(test=role_with_privilege)(table_type=table_type)
Scenario(test=role_with_revoked_privilege)(table_type=table_type)
Scenario(test=user_with_revoked_role)(table_type=table_type)
Scenario(test=role_with_privilege_on_columns)(table_type=table_type)
Scenario(test=user_with_privilege_on_cluster)(table_type=table_type)
Scenario(test=user_with_privilege_from_user_with_grant_option)(table_type=table_type)
Scenario(test=role_with_privilege_from_user_with_grant_option)(table_type=table_type)
Scenario(test=user_with_privilege_from_role_with_grant_option)(table_type=table_type)
Scenario(test=role_with_privilege_from_role_with_grant_option)(table_type=table_type)
Scenario(test=revoke_privilege_from_user_via_user_with_grant_option)(table_type=table_type)
Scenario(test=revoke_privilege_from_role_via_user_with_grant_option)(table_type=table_type)
Scenario(test=revoke_privilege_from_user_via_role_with_grant_option)(table_type=table_type)
Scenario(test=revoke_privilege_from_role_via_role_with_grant_option)(table_type=table_type)

View File

@ -9,7 +9,22 @@ import rbac.tests.errors as errors
table_types = { table_types = {
"MergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, (a, b), 111)", "MergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, (a, b), 111)",
"CollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, (a, b), 111, y);" "ReplacingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplacingMergeTree(d, (a, b), 111)",
"SummingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, (a, b), 111)",
"AggregatingMergeTree": "CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, (a, b), 111)",
"CollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, (a, b), 111, y);",
"VersionedCollapsingMergeTree": "CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, z UInt32, version UInt64, sign Int8, INDEX a (b * y, d) TYPE minmax GRANULARITY 3) ENGINE = VersionedCollapsingMergeTree(sign, version) ORDER BY tuple()",
"GraphiteMergeTree": "CREATE TABLE {name} (key UInt32, Path String, Time DateTime, d Date, a String, b UInt8, x String, y Int8, z UInt32, Value Float64, Version UInt32, col UInt64, INDEX a (key * Value, Time) TYPE minmax GRANULARITY 3) ENGINE = GraphiteMergeTree('graphite_rollup_example') ORDER BY tuple()"
}
table_requirements ={
"MergeTree": RQ_SRS_006_RBAC_Privileges_Select_MergeTree("1.0"),
"ReplacingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_ReplacingMergeTree("1.0"),
"SummingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_SummingMergeTree("1.0"),
"AggregatingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_AggregatingMergeTree("1.0"),
"CollapsingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_CollapsingMergeTree("1.0"),
"VersionedCollapsingMergeTree": RQ_SRS_006_RBAC_Privileges_Select_VersionedCollapsingMergeTree("1.0"),
"GraphiteMergeTree": RQ_SRS_006_RBAC_Privileges_Select_GraphiteMergeTree("1.0"),
} }
@contextmanager @contextmanager
@ -462,7 +477,7 @@ def revoke_privilege_from_role_via_role_with_grant_option(self, table_type, node
RQ_SRS_006_RBAC_Privileges_Select("1.0"), RQ_SRS_006_RBAC_Privileges_Select("1.0"),
) )
@Examples("table_type", [ @Examples("table_type", [
(key,) for key in table_types.keys() (table_type, Requirements(requirement)) for table_type, requirement in table_requirements.items()
]) ])
@Name("select") @Name("select")
def feature(self, table_type, node="clickhouse1"): def feature(self, table_type, node="clickhouse1"):