Merge pull request #67622 from Algunenano/unit_test_asan

Don't run ASAN unit tests under gdb
This commit is contained in:
Raúl Marín 2024-08-07 10:48:00 +00:00 committed by GitHub
commit c9340cba32
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 177 additions and 121 deletions

View File

@ -28,12 +28,14 @@ RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_
RUN echo "UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'" >> /etc/environment RUN echo "UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'" >> /etc/environment RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt max_allocation_size_mb=32768'" >> /etc/environment RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt max_allocation_size_mb=32768'" >> /etc/environment
RUN echo "ASAN_OPTIONS='halt_on_error=1 abort_on_error=1'" >> /etc/environment
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match) # (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768' ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'
ENV UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768' ENV UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'
ENV LSAN_OPTIONS='max_allocation_size_mb=32768' ENV LSAN_OPTIONS='max_allocation_size_mb=32768'
ENV ASAN_OPTIONS='halt_on_error=1 abort_on_error=1'
# for external_symbolizer_path, and also ensure that llvm-symbolizer really # for external_symbolizer_path, and also ensure that llvm-symbolizer really
# exists (since you don't want to fallback to addr2line, it is very slow) # exists (since you don't want to fallback to addr2line, it is very slow)

View File

@ -193,6 +193,11 @@ function fuzz
kill -0 $server_pid kill -0 $server_pid
IS_ASAN=$(clickhouse-client --query "SELECT count() FROM system.build_options WHERE name = 'CXX_FLAGS' AND position('sanitize=address' IN value)")
if [[ "$IS_ASAN" = "1" ]];
then
echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
else
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
# and clickhouse-server can do fork-exec, for example, to run some bridge. # and clickhouse-server can do fork-exec, for example, to run some bridge.
# Do not set nostop noprint for all signals, because some it may cause gdb to hang, # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
@ -240,6 +245,8 @@ quit
sleep 1 sleep 1
done done
kill -0 $server_pid # This checks that it is our server that is started and not some other one kill -0 $server_pid # This checks that it is our server that is started and not some other one
fi
echo 'Server started and responded.' echo 'Server started and responded.'
setup_logs_replication setup_logs_replication
@ -264,8 +271,13 @@ quit
# The fuzzer_pid belongs to the timeout process. # The fuzzer_pid belongs to the timeout process.
actual_fuzzer_pid=$(ps -o pid= --ppid "$fuzzer_pid") actual_fuzzer_pid=$(ps -o pid= --ppid "$fuzzer_pid")
if [[ "$IS_ASAN" = "1" ]];
then
echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
else
echo "Attaching gdb to the fuzzer itself" echo "Attaching gdb to the fuzzer itself"
gdb -batch -command script.gdb -p $actual_fuzzer_pid & gdb -batch -command script.gdb -p $actual_fuzzer_pid &
fi
# Wait for the fuzzer to complete. # Wait for the fuzzer to complete.
# Note that the 'wait || ...' thing is required so that the script doesn't # Note that the 'wait || ...' thing is required so that the script doesn't

View File

@ -5,6 +5,11 @@ source /utils.lib
function attach_gdb_to_clickhouse() function attach_gdb_to_clickhouse()
{ {
IS_ASAN=$(clickhouse-client --query "SELECT count() FROM system.build_options WHERE name = 'CXX_FLAGS' AND position('sanitize=address' IN value)")
if [[ "$IS_ASAN" = "1" ]];
then
echo "ASAN build detected. Not using gdb since it disables LeakSanitizer detections"
else
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
# and clickhouse-server can do fork-exec, for example, to run some bridge. # and clickhouse-server can do fork-exec, for example, to run some bridge.
# Do not set nostop noprint for all signals, because some it may cause gdb to hang, # Do not set nostop noprint for all signals, because some it may cause gdb to hang,
@ -46,6 +51,7 @@ quit
sleep 5 sleep 5
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'"
fi
} }
# vi: ft=bash # vi: ft=bash

View File

@ -174,7 +174,7 @@ do
done done
setup_logs_replication setup_logs_replication
attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 attach_gdb_to_clickhouse
function fn_exists() { function fn_exists() {
declare -F "$1" > /dev/null; declare -F "$1" > /dev/null;

View File

@ -308,7 +308,8 @@ function collect_query_and_trace_logs()
{ {
for table in query_log trace_log metric_log for table in query_log trace_log metric_log
do do
clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: # Don't ignore errors here, it leads to ignore sanitizer reports when running clickhouse-local
clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst
done done
} }

View File

@ -4,4 +4,5 @@ ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG FROM clickhouse/test-base:$FROM_TAG
COPY run.sh / COPY run.sh /
CMD ["/bin/bash", "/run.sh"] RUN chmod +x run.sh
ENTRYPOINT ["/run.sh"]

View File

@ -1,5 +1,27 @@
#!/bin/bash #!/bin/bash
set -x set -x
# Need to keep error from tests after `tee`. Otherwise we don't alert on asan errors
set -o pipefail
set -e
timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms --gtest_output='json:test_output/test_result.json' | tee test_output/test_result.txt if [ "$#" -ne 1 ]; then
echo "Expected exactly one argument"
exit 1
fi
if [ "$1" = "GDB" ];
then
timeout 40m \
gdb -q -ex "set print inferior-events off" -ex "set confirm off" -ex "set print thread-events off" -ex run -ex bt -ex quit --args \
./unit_tests_dbms --gtest_output='json:test_output/test_result.json' \
| tee test_output/test_result.txt
elif [ "$1" = "NO_GDB" ];
then
timeout 40m \
./unit_tests_dbms --gtest_output='json:test_output/test_result.json' \
| tee test_output/test_result.txt
else
echo "Unknown argument: $1"
exit 1
fi

View File

@ -8,7 +8,9 @@ using namespace DB;
using ResourceTest = ResourceTestClass; using ResourceTest = ResourceTestClass;
TEST(SchedulerFairPolicy, Factory) /// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
TEST(DISABLED_SchedulerFairPolicy, Factory)
{ {
ResourceTest t; ResourceTest t;
@ -17,7 +19,7 @@ TEST(SchedulerFairPolicy, Factory)
EXPECT_TRUE(dynamic_cast<FairPolicy *>(fair.get()) != nullptr); EXPECT_TRUE(dynamic_cast<FairPolicy *>(fair.get()) != nullptr);
} }
TEST(SchedulerFairPolicy, FairnessWeights) TEST(DISABLED_SchedulerFairPolicy, FairnessWeights)
{ {
ResourceTest t; ResourceTest t;
@ -41,7 +43,7 @@ TEST(SchedulerFairPolicy, FairnessWeights)
t.consumed("B", 20); t.consumed("B", 20);
} }
TEST(SchedulerFairPolicy, Activation) TEST(DISABLED_SchedulerFairPolicy, Activation)
{ {
ResourceTest t; ResourceTest t;
@ -77,7 +79,7 @@ TEST(SchedulerFairPolicy, Activation)
t.consumed("B", 10); t.consumed("B", 10);
} }
TEST(SchedulerFairPolicy, FairnessMaxMin) TEST(DISABLED_SchedulerFairPolicy, FairnessMaxMin)
{ {
ResourceTest t; ResourceTest t;
@ -101,7 +103,7 @@ TEST(SchedulerFairPolicy, FairnessMaxMin)
t.consumed("A", 20); t.consumed("A", 20);
} }
TEST(SchedulerFairPolicy, HierarchicalFairness) TEST(DISABLED_SchedulerFairPolicy, HierarchicalFairness)
{ {
ResourceTest t; ResourceTest t;

View File

@ -8,7 +8,9 @@ using namespace DB;
using ResourceTest = ResourceTestClass; using ResourceTest = ResourceTestClass;
TEST(SchedulerPriorityPolicy, Factory) /// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
TEST(DISABLED_SchedulerPriorityPolicy, Factory)
{ {
ResourceTest t; ResourceTest t;
@ -17,7 +19,7 @@ TEST(SchedulerPriorityPolicy, Factory)
EXPECT_TRUE(dynamic_cast<PriorityPolicy *>(prio.get()) != nullptr); EXPECT_TRUE(dynamic_cast<PriorityPolicy *>(prio.get()) != nullptr);
} }
TEST(SchedulerPriorityPolicy, Priorities) TEST(DISABLED_SchedulerPriorityPolicy, Priorities)
{ {
ResourceTest t; ResourceTest t;
@ -51,7 +53,7 @@ TEST(SchedulerPriorityPolicy, Priorities)
t.consumed("C", 0); t.consumed("C", 0);
} }
TEST(SchedulerPriorityPolicy, Activation) TEST(DISABLED_SchedulerPriorityPolicy, Activation)
{ {
ResourceTest t; ResourceTest t;
@ -92,7 +94,7 @@ TEST(SchedulerPriorityPolicy, Activation)
t.consumed("C", 0); t.consumed("C", 0);
} }
TEST(SchedulerPriorityPolicy, SinglePriority) TEST(DISABLED_SchedulerPriorityPolicy, SinglePriority)
{ {
ResourceTest t; ResourceTest t;

View File

@ -10,7 +10,9 @@ using namespace DB;
using ResourceTest = ResourceTestClass; using ResourceTest = ResourceTestClass;
TEST(SchedulerThrottlerConstraint, LeakyBucketConstraint) /// Tests disabled because of leaks in the test themselves: https://github.com/ClickHouse/ClickHouse/issues/67678
TEST(DISABLED_SchedulerThrottlerConstraint, LeakyBucketConstraint)
{ {
ResourceTest t; ResourceTest t;
EventQueue::TimePoint start = std::chrono::system_clock::now(); EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -40,7 +42,7 @@ TEST(SchedulerThrottlerConstraint, LeakyBucketConstraint)
t.consumed("A", 10); t.consumed("A", 10);
} }
TEST(SchedulerThrottlerConstraint, Unlimited) TEST(DISABLED_SchedulerThrottlerConstraint, Unlimited)
{ {
ResourceTest t; ResourceTest t;
EventQueue::TimePoint start = std::chrono::system_clock::now(); EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -57,7 +59,7 @@ TEST(SchedulerThrottlerConstraint, Unlimited)
} }
} }
TEST(SchedulerThrottlerConstraint, Pacing) TEST(DISABLED_SchedulerThrottlerConstraint, Pacing)
{ {
ResourceTest t; ResourceTest t;
EventQueue::TimePoint start = std::chrono::system_clock::now(); EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -77,7 +79,7 @@ TEST(SchedulerThrottlerConstraint, Pacing)
} }
} }
TEST(SchedulerThrottlerConstraint, BucketFilling) TEST(DISABLED_SchedulerThrottlerConstraint, BucketFilling)
{ {
ResourceTest t; ResourceTest t;
EventQueue::TimePoint start = std::chrono::system_clock::now(); EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -111,7 +113,7 @@ TEST(SchedulerThrottlerConstraint, BucketFilling)
t.consumed("A", 3); t.consumed("A", 3);
} }
TEST(SchedulerThrottlerConstraint, PeekAndAvgLimits) TEST(DISABLED_SchedulerThrottlerConstraint, PeekAndAvgLimits)
{ {
ResourceTest t; ResourceTest t;
EventQueue::TimePoint start = std::chrono::system_clock::now(); EventQueue::TimePoint start = std::chrono::system_clock::now();
@ -139,7 +141,7 @@ TEST(SchedulerThrottlerConstraint, PeekAndAvgLimits)
} }
} }
TEST(SchedulerThrottlerConstraint, ThrottlerAndFairness) TEST(DISABLED_SchedulerThrottlerConstraint, ThrottlerAndFairness)
{ {
ResourceTest t; ResourceTest t;
EventQueue::TimePoint start = std::chrono::system_clock::now(); EventQueue::TimePoint start = std::chrono::system_clock::now();

View File

@ -14,12 +14,12 @@
/// because of broken getauxval() [1]. /// because of broken getauxval() [1].
/// ///
/// [1]: https://github.com/ClickHouse/ClickHouse/pull/33957 /// [1]: https://github.com/ClickHouse/ClickHouse/pull/33957
TEST(Common, LSan) TEST(SanitizerDeathTest, LSan)
{ {
int sanitizers_exit_code = 1; EXPECT_DEATH(
{
ASSERT_EXIT({ std::thread leak_in_thread(
std::thread leak_in_thread([]() []()
{ {
void * leak = malloc(4096); void * leak = malloc(4096);
ASSERT_NE(leak, nullptr); ASSERT_NE(leak, nullptr);
@ -27,7 +27,8 @@ TEST(Common, LSan)
leak_in_thread.join(); leak_in_thread.join();
__lsan_do_leak_check(); __lsan_do_leak_check();
}, ::testing::ExitedWithCode(sanitizers_exit_code), ".*LeakSanitizer: detected memory leaks.*"); },
".*LeakSanitizer: detected memory leaks.*");
} }
#endif #endif

View File

@ -174,10 +174,13 @@ def main():
test_output = temp_path / "test_output" test_output = temp_path / "test_output"
test_output.mkdir(parents=True, exist_ok=True) test_output.mkdir(parents=True, exist_ok=True)
# Don't run ASAN under gdb since that breaks leak detection
gdb_enabled = "NO_GDB" if "asan" in check_name else "GDB"
run_command = ( run_command = (
f"docker run --cap-add=SYS_PTRACE --volume={tests_binary}:/unit_tests_dbms " f"docker run --cap-add=SYS_PTRACE --volume={tests_binary}:/unit_tests_dbms "
"--security-opt seccomp=unconfined " # required to issue io_uring sys-calls "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls
f"--volume={test_output}:/test_output {docker_image}" f"--volume={test_output}:/test_output {docker_image} {gdb_enabled}"
) )
run_log_path = test_output / "run.log" run_log_path = test_output / "run.log"
@ -194,6 +197,11 @@ def main():
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {TEMP_PATH}", shell=True) subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {TEMP_PATH}", shell=True)
state, description, test_results = process_results(test_output) state, description, test_results = process_results(test_output)
if retcode != 0 and state == SUCCESS:
# The process might have failed without reporting it in the test_output (e.g. LeakSanitizer)
state = FAILURE
description = "Invalid return code. Check run.log"
additional_files = [run_log_path] + [ additional_files = [run_log_path] + [
p for p in test_output.iterdir() if not p.is_dir() p for p in test_output.iterdir() if not p.is_dir()
] ]

View File

@ -1,9 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# shellcheck disable=SC2120 # shellcheck disable=SC2120
# Don't check for ODR violation, since we may test shared build with ASAN
export ASAN_OPTIONS=detect_odr_violation=0
# If ClickHouse was built with coverage - dump the coverage information at exit # If ClickHouse was built with coverage - dump the coverage information at exit
# (in other cases this environment variable has no effect) # (in other cases this environment variable has no effect)
export CLICKHOUSE_WRITE_COVERAGE="coverage" export CLICKHOUSE_WRITE_COVERAGE="coverage"