Merge branch 'master' into maybe-less-flaky-01019_alter_materialized_view_consistent

This commit is contained in:
Antonio Andelic 2024-06-14 09:09:21 +00:00
commit 0ee22ca5f9
86 changed files with 1515 additions and 411 deletions

View File

@ -273,5 +273,5 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py

View File

@ -173,4 +173,4 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}

View File

@ -99,7 +99,7 @@ jobs:
################################# Stage Final #################################
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
if: ${{ !cancelled() }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1]
runs-on: [self-hosted, style-checker-aarch64]
steps:
@ -112,4 +112,4 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }}
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}

View File

@ -191,7 +191,7 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
#############################################################################################
###################################### JEPSEN TESTS #########################################

View File

@ -496,4 +496,4 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}

View File

@ -46,9 +46,10 @@ jobs:
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
./utils/list-versions/update-docker-version.sh
GID=$(id -g "${UID}")
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \
# --network=host and CI=1 are required for the S3 access from a container
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
--volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \
/ClickHouse/utils/changelog/changelog.py -v --debug-helpers \
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
--gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \
--output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}"
git add "./docs/changelogs/${GITHUB_TAG}.md"

View File

@ -399,7 +399,7 @@ option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON)
# but GWP-ASan also wants to use mmap frequently,
# and due to a large number of memory mappings,
# it does not work together well.
if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") OR SANITIZE)
set(ENABLE_GWP_ASAN OFF)
endif ()

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.5.1.1763"
ARG VERSION="24.5.3.5"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.5.1.1763"
ARG VERSION="24.5.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.5.1.1763"
ARG VERSION="24.5.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off

View File

@ -208,6 +208,7 @@ handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIGSEGV nostop pass
handle SIG$RTMIN nostop noprint pass
info signals
continue

View File

@ -20,6 +20,7 @@ handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIGSEGV nostop pass
handle SIG$RTMIN nostop noprint pass
info signals
continue

View File

@ -10,14 +10,15 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
aspell \
curl \
git \
gh \
file \
gh \
git \
libxml2-utils \
locales \
moreutils \
python3-pip \
yamllint \
locales \
zstd \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

View File

@ -0,0 +1,100 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.3.4.147-lts (31a7bdc346d) FIXME as compared to v24.3.3.102-lts (7e7f3bdd9be)
#### Improvement
* Backported in [#63465](https://github.com/ClickHouse/ClickHouse/issues/63465): Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#64290](https://github.com/ClickHouse/ClickHouse/issues/64290): Fix logical-error when undoing quorum insert transaction. [#61953](https://github.com/ClickHouse/ClickHouse/pull/61953) ([Han Fei](https://github.com/hanfei1991)).
#### Build/Testing/Packaging Improvement
* Backported in [#63610](https://github.com/ClickHouse/ClickHouse/issues/63610): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#65128](https://github.com/ClickHouse/ClickHouse/issues/65128): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#64277](https://github.com/ClickHouse/ClickHouse/issues/64277): Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#63716](https://github.com/ClickHouse/ClickHouse/issues/63716): Fix excessive memory usage for queries with nested lambdas. Fixes [#62036](https://github.com/ClickHouse/ClickHouse/issues/62036). [#62462](https://github.com/ClickHouse/ClickHouse/pull/62462) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#63247](https://github.com/ClickHouse/ClickHouse/issues/63247): Fix size checks when updating materialized nested columns ( fixes [#62731](https://github.com/ClickHouse/ClickHouse/issues/62731) ). [#62773](https://github.com/ClickHouse/ClickHouse/pull/62773) ([Eliot Hautefeuille](https://github.com/hileef)).
* Backported in [#62984](https://github.com/ClickHouse/ClickHouse/issues/62984): Fix the `Unexpected return type` error for queries that read from `StorageBuffer` with `PREWHERE` when the source table has different types. Fixes [#62545](https://github.com/ClickHouse/ClickHouse/issues/62545). [#62916](https://github.com/ClickHouse/ClickHouse/pull/62916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#63185](https://github.com/ClickHouse/ClickHouse/issues/63185): Sanity check: Clamp values instead of throwing. [#63119](https://github.com/ClickHouse/ClickHouse/pull/63119) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#63293](https://github.com/ClickHouse/ClickHouse/issues/63293): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#63411](https://github.com/ClickHouse/ClickHouse/issues/63411): Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)).
* Backported in [#63616](https://github.com/ClickHouse/ClickHouse/issues/63616): Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)).
* Backported in [#63455](https://github.com/ClickHouse/ClickHouse/issues/63455): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)).
* Backported in [#63603](https://github.com/ClickHouse/ClickHouse/issues/63603): Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#63508](https://github.com/ClickHouse/ClickHouse/issues/63508): Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)).
* Backported in [#63595](https://github.com/ClickHouse/ClickHouse/issues/63595): Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#63748](https://github.com/ClickHouse/ClickHouse/issues/63748): Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#63770](https://github.com/ClickHouse/ClickHouse/issues/63770): Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64189](https://github.com/ClickHouse/ClickHouse/issues/64189): Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#63845](https://github.com/ClickHouse/ClickHouse/issues/63845): Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)).
* Backported in [#63906](https://github.com/ClickHouse/ClickHouse/issues/63906): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)).
* Backported in [#63989](https://github.com/ClickHouse/ClickHouse/issues/63989): Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#64031](https://github.com/ClickHouse/ClickHouse/issues/64031): Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64559](https://github.com/ClickHouse/ClickHouse/issues/64559): Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#64009](https://github.com/ClickHouse/ClickHouse/issues/64009): Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#64236](https://github.com/ClickHouse/ClickHouse/issues/64236): Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64106](https://github.com/ClickHouse/ClickHouse/issues/64106): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#64168](https://github.com/ClickHouse/ClickHouse/issues/64168): Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64320](https://github.com/ClickHouse/ClickHouse/issues/64320): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)).
* Backported in [#64380](https://github.com/ClickHouse/ClickHouse/issues/64380): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#64567](https://github.com/ClickHouse/ClickHouse/issues/64567): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#64270](https://github.com/ClickHouse/ClickHouse/issues/64270): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64339](https://github.com/ClickHouse/ClickHouse/issues/64339): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#64259](https://github.com/ClickHouse/ClickHouse/issues/64259): Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#64688](https://github.com/ClickHouse/ClickHouse/issues/64688): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64725](https://github.com/ClickHouse/ClickHouse/issues/64725): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#64621](https://github.com/ClickHouse/ClickHouse/issues/64621): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64678](https://github.com/ClickHouse/ClickHouse/issues/64678): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64831](https://github.com/ClickHouse/ClickHouse/issues/64831): Fix bug which could lead to non-working TTLs with expressions. Fixes [#63700](https://github.com/ClickHouse/ClickHouse/issues/63700). [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)).
* Backported in [#64940](https://github.com/ClickHouse/ClickHouse/issues/64940): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64869](https://github.com/ClickHouse/ClickHouse/issues/64869): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#64980](https://github.com/ClickHouse/ClickHouse/issues/64980): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64972](https://github.com/ClickHouse/ClickHouse/issues/64972): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)).
* Backported in [#65070](https://github.com/ClickHouse/ClickHouse/issues/65070): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#65175](https://github.com/ClickHouse/ClickHouse/issues/65175): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#64587](https://github.com/ClickHouse/ClickHouse/issues/64587): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#64878](https://github.com/ClickHouse/ClickHouse/issues/64878): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)).
#### NO CL CATEGORY
* Backported in [#63304](https://github.com/ClickHouse/ClickHouse/issues/63304):. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#63708](https://github.com/ClickHouse/ClickHouse/issues/63708):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Backport [#64363](https://github.com/ClickHouse/ClickHouse/issues/64363) to 24.3: Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts"'. [#64907](https://github.com/ClickHouse/ClickHouse/pull/64907) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#63751](https://github.com/ClickHouse/ClickHouse/issues/63751): group_by_use_nulls strikes back. [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#63558](https://github.com/ClickHouse/ClickHouse/issues/63558): Try fix segfault in `MergeTreeReadPoolBase::createTask`. [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#63336](https://github.com/ClickHouse/ClickHouse/issues/63336): The commit url has different pattern. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#63374](https://github.com/ClickHouse/ClickHouse/issues/63374): Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#63625](https://github.com/ClickHouse/ClickHouse/issues/63625): Workaround for `oklch()` inside canvas bug for firefox. [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)).
* Backported in [#63569](https://github.com/ClickHouse/ClickHouse/issues/63569): Add `jwcrypto` to integration tests runner. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Backported in [#63649](https://github.com/ClickHouse/ClickHouse/issues/63649): Fix `02362_part_log_merge_algorithm` flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
* Backported in [#63762](https://github.com/ClickHouse/ClickHouse/issues/63762): Cancel S3 reads properly when parallel reads are used. [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#63741](https://github.com/ClickHouse/ClickHouse/issues/63741): Userspace page cache: don't collect stats if cache is unused. [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)).
* Backported in [#63826](https://github.com/ClickHouse/ClickHouse/issues/63826): Fix `test_odbc_interaction` for arm64 on linux. [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)).
* Backported in [#63895](https://github.com/ClickHouse/ClickHouse/issues/63895): Fix `test_catboost_evaluate` for aarch64. [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)).
* Backported in [#63887](https://github.com/ClickHouse/ClickHouse/issues/63887): Fix `test_disk_types` for aarch64. [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)).
* Backported in [#63879](https://github.com/ClickHouse/ClickHouse/issues/63879): Fix `test_short_strings_aggregation` for arm. [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)).
* Backported in [#63916](https://github.com/ClickHouse/ClickHouse/issues/63916): Disable `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)).
* Backported in [#63969](https://github.com/ClickHouse/ClickHouse/issues/63969): fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)).
* Backported in [#64047](https://github.com/ClickHouse/ClickHouse/issues/64047): Do not create new release in release branch automatically. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#64076](https://github.com/ClickHouse/ClickHouse/issues/64076): Files without shebang have mime 'text/plain' or 'inode/x-empty'. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#64142](https://github.com/ClickHouse/ClickHouse/issues/64142): Fix sanitizers. [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#64159](https://github.com/ClickHouse/ClickHouse/issues/64159): Add retries in `git submodule update`. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#64473](https://github.com/ClickHouse/ClickHouse/issues/64473): Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts. [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#65113](https://github.com/ClickHouse/ClickHouse/issues/65113): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#64999](https://github.com/ClickHouse/ClickHouse/issues/64999): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)).

View File

@ -0,0 +1,38 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.5.2.34-stable (45589aeee49) FIXME as compared to v24.5.1.1763-stable (647c154a94d)
#### Improvement
* Backported in [#65096](https://github.com/ClickHouse/ClickHouse/issues/65096): The setting `allow_experimental_join_condition` was accidentally marked as important which may prevent distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Build/Testing/Packaging Improvement
* Backported in [#65132](https://github.com/ClickHouse/ClickHouse/issues/65132): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#64729](https://github.com/ClickHouse/ClickHouse/issues/64729): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#65061](https://github.com/ClickHouse/ClickHouse/issues/65061): Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.` Fixes [#64445](https://github.com/ClickHouse/ClickHouse/issues/64445). [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088): Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). Fixes [#64575](https://github.com/ClickHouse/ClickHouse/issues/64575). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64944](https://github.com/ClickHouse/ClickHouse/issues/64944): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64873](https://github.com/ClickHouse/ClickHouse/issues/64873): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#64984](https://github.com/ClickHouse/ClickHouse/issues/64984): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64976](https://github.com/ClickHouse/ClickHouse/issues/64976): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)).
* Backported in [#65074](https://github.com/ClickHouse/ClickHouse/issues/65074): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#65179](https://github.com/ClickHouse/ClickHouse/issues/65179): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#65163](https://github.com/ClickHouse/ClickHouse/issues/65163): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. Part of [#62245](https://github.com/ClickHouse/ClickHouse/issues/62245). [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#64882](https://github.com/ClickHouse/ClickHouse/issues/64882): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65002](https://github.com/ClickHouse/ClickHouse/issues/65002): Be more graceful with existing tables with `inverted` indexes. [#64656](https://github.com/ClickHouse/ClickHouse/pull/64656) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#65115](https://github.com/ClickHouse/ClickHouse/issues/65115): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#64796](https://github.com/ClickHouse/ClickHouse/issues/64796): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)).

View File

@ -0,0 +1,14 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.5.3.5-stable (e0eb66f8e17) FIXME as compared to v24.5.2.34-stable (45589aeee49)
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65227](https://github.com/ClickHouse/ClickHouse/issues/65227): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#65219](https://github.com/ClickHouse/ClickHouse/issues/65219): Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)).

View File

@ -229,6 +229,10 @@ For production builds, clang is used, but we also test make gcc builds. For deve
## Sanitizers {#sanitizers}
:::note
If the process (ClickHouse server or client) crashes at startup when running it locally, you might need to disable address space layout randomization: `sudo sysctl kernel.randomize_va_space=0`
:::
### Address sanitizer
We run functional, integration, stress and unit tests under ASan on per-commit basis.

View File

@ -75,7 +75,7 @@ Possible values:
- unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper.
- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper.
Default value: `unordered`.
Default value: `ordered` in versions before 24.6. Starting with 24.6 there is no default value, the setting becomes required to be specified manually. For tables created on earlier versions the default value will remain `Ordered` for compatibility.
### after_processing {#after_processing}
@ -181,6 +181,10 @@ For 'Ordered' mode. Defines a maximum boundary for reschedule interval for a bac
Default value: `30000`.
### s3queue_buckets {#buckets}
For 'Ordered' mode. Available since `24.6`. If there are several replicas of S3Queue table, each working with the same metadata directory in keeper, the value of `s3queue_buckets` needs to be equal to at least the number of replicas. If `s3queue_processing_threads` setting is used as well, it makes sense to increase the value of `s3queue_buckets` setting even further, as it defines the actual parallelism of `S3Queue` processing.
## S3-related Settings {#s3-settings}
Engine supports all s3 related settings. For more information about S3 settings see [here](../../../engines/table-engines/integrations/s3.md).

View File

@ -2924,6 +2924,8 @@ Define proxy servers for HTTP and HTTPS requests, currently supported by S3 stor
There are three ways to define proxy servers: environment variables, proxy lists, and remote proxy resolvers.
Bypassing proxy servers for specific hosts is also supported with the use of `no_proxy`.
### Environment variables
The `http_proxy` and `https_proxy` environment variables allow you to specify a
@ -3033,6 +3035,29 @@ This also allows a mix of resolver types can be used.
By default, tunneling (i.e, `HTTP CONNECT`) is used to make `HTTPS` requests over `HTTP` proxy. This setting can be used to disable it.
### no_proxy
By default, all requests will go through the proxy. In order to disable it for specific hosts, the `no_proxy` variable must be set.
It can be set inside the `<proxy>` clause for list and remote resolvers and as an environment variable for environment resolver.
It supports IP addresses, domains, subdomains and `'*'` wildcard for full bypass. Leading dots are stripped just like curl does.
Example:
The below configuration bypasses proxy requests to `clickhouse.cloud` and all of its subdomains (e.g, `auth.clickhouse.cloud`).
The same applies to GitLab, even though it has a leading dot. Both `gitlab.com` and `about.gitlab.com` would bypass the proxy.
``` xml
<proxy>
<no_proxy>clickhouse.cloud,.gitlab.com</no_proxy>
<http>
<uri>http://proxy1</uri>
<uri>http://proxy2:3128</uri>
</http>
<https>
<uri>http://proxy1:3128</uri>
</https>
</proxy>
```
## max_materialized_views_count_for_table {#max_materialized_views_count_for_table}
A limit on the number of materialized views attached to a table.

View File

@ -123,7 +123,7 @@ Bloom filter是一种数据结构它允许对集合成员进行高效的是
有三种基于Bloom过滤器的数据跳数索引类型
* 基本的**bloom_filter**接受一个可选参数该参数表示在0到1之间允许的“假阳性”率(如果未指定,则使用.025)。
* 基本的**bloom_filter**接受一个可选参数该参数表示在0到1之间允许的“假阳性”率(如果未指定,则使用0.025)。
* 更专业的**tokenbf_v1**。需要三个参数用来优化布隆过滤器1过滤器的大小字节(大过滤器有更少的假阳性,有更高的存储成本)2哈希函数的个数(更多的散列函数可以减少假阳性)。3布隆过滤器哈希函数的种子。有关这些参数如何影响布隆过滤器功能的更多细节请参阅 [这里](https://hur.st/bloomfilter/) 。此索引仅适用于String、FixedString和Map类型的数据。输入表达式被分割为由非字母数字字符分隔的字符序列。例如列值`This is a candidate for a "full text" search`将被分割为`This` `is` `a` `candidate` `for` `full` `text` `search`。它用于LIKE、EQUALS、in、hasToken()和类似的长字符串中单词和其他值的搜索。例如,一种可能的用途是在非结构的应用程序日志行列中搜索少量的类名或行号。

View File

@ -1671,6 +1671,10 @@ try
if (global_context->isServerCompletelyStarted())
CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
#if USE_GWP_ASAN
GWPAsan::setForceSampleProbability(new_server_settings.gwp_asan_force_sample_probability);
#endif
ProfileEvents::increment(ProfileEvents::MainConfigLoads);
/// Must be the last.
@ -2120,6 +2124,10 @@ try
CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability);
#if USE_GWP_ASAN
GWPAsan::setForceSampleProbability(server_settings.gwp_asan_force_sample_probability);
#endif
try
{
global_context->startClusterDiscovery();

View File

@ -360,10 +360,13 @@ void ContextAccess::setUser(const UserPtr & user_) const
subscription_for_roles_changes.reset();
enabled_roles = access_control->getEnabledRoles(current_roles, current_roles_with_admin_option);
subscription_for_roles_changes = enabled_roles->subscribeForChanges([this](const std::shared_ptr<const EnabledRolesInfo> & roles_info_)
subscription_for_roles_changes = enabled_roles->subscribeForChanges([weak_ptr = weak_from_this()](const std::shared_ptr<const EnabledRolesInfo> & roles_info_)
{
std::lock_guard lock{mutex};
setRolesInfo(roles_info_);
auto ptr = weak_ptr.lock();
if (!ptr)
return;
std::lock_guard lock{ptr->mutex};
ptr->setRolesInfo(roles_info_);
});
setRolesInfo(enabled_roles->getRolesInfo());

View File

@ -285,7 +285,7 @@ if (TARGET ch_contrib::llvm)
endif ()
if (TARGET ch_contrib::gwp_asan)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::gwp_asan)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::gwp_asan)
target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::gwp_asan)
endif()

View File

@ -1188,7 +1188,10 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
std::rethrow_exception(local_format_error);
if (cancelled && is_interactive)
{
std::cout << "Query was cancelled." << std::endl;
cancelled_printed = true;
}
}
@ -1302,8 +1305,13 @@ void ClientBase::onEndOfStream()
resetOutput();
if (is_interactive && !written_first_block)
std::cout << "Ok." << std::endl;
if (is_interactive)
{
if (cancelled && !cancelled_printed)
std::cout << "Query was cancelled." << std::endl;
else if (!written_first_block)
std::cout << "Ok." << std::endl;
}
}
@ -1866,6 +1874,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
resetOutput();
have_error = false;
cancelled = false;
cancelled_printed = false;
client_exception.reset();
server_exception.reset();

View File

@ -329,6 +329,7 @@ protected:
bool allow_merge_tree_settings = false;
bool cancelled = false;
bool cancelled_printed = false;
/// Does log_comment has specified by user?
bool has_log_comment = false;

View File

@ -1,8 +1,9 @@
#include <Common/Allocator.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include <Common/formatReadable.h>
#include <Common/CurrentMemoryTracker.h>
#include <Common/Exception.h>
#include <Common/GWPAsan.h>
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
#include <base/getPageSize.h>
@ -10,6 +11,12 @@
#include <Poco/Logger.h>
#include <sys/mman.h> /// MADV_POPULATE_WRITE
namespace ProfileEvents
{
extern const Event GWPAsanAllocateSuccess;
extern const Event GWPAsanAllocateFailed;
extern const Event GWPAsanFree;
}
namespace DB
{
@ -60,6 +67,27 @@ template <bool clear_memory, bool populate>
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf;
#if USE_GWP_ASAN
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
{
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignment))
{
if constexpr (clear_memory)
memset(ptr, 0, size);
if constexpr (populate)
prefaultPages(ptr, size);
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
return ptr;
}
else
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
}
}
#endif
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
@ -91,6 +119,15 @@ void * allocNoTrack(size_t size, size_t alignment)
void freeNoTrack(void * buf)
{
#if USE_GWP_ASAN
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(buf)))
{
ProfileEvents::increment(ProfileEvents::GWPAsanFree);
GWPAsan::GuardedAlloc.deallocate(buf);
return;
}
#endif
::free(buf);
}
@ -144,8 +181,54 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
{
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
return buf;
}
else if (alignment <= MALLOC_MIN_ALIGNMENT)
#if USE_GWP_ASAN
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
{
if (void * ptr = GWPAsan::GuardedAlloc.allocate(new_size, alignment))
{
auto trace_free = CurrentMemoryTracker::free(old_size);
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
trace_free.onFree(buf, old_size);
memcpy(ptr, buf, std::min(old_size, new_size));
free(buf, old_size);
trace_alloc.onAlloc(buf, new_size);
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(ptr) + old_size, 0, new_size - old_size);
if constexpr (populate)
prefaultPages(ptr, new_size);
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
return ptr;
}
else
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
}
}
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(buf)))
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
if constexpr (populate)
prefaultPages(buf, new_size);
return buf;
}
#endif
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
auto trace_free = CurrentMemoryTracker::free(old_size);

View File

@ -1,6 +1,7 @@
#include "EnvironmentProxyConfigurationResolver.h"
#include <Common/logger_useful.h>
#include <Common/proxyConfigurationToPocoProxyConfig.h>
#include <Poco/URI.h>
namespace DB
@ -12,6 +13,7 @@ namespace DB
* */
static constexpr auto PROXY_HTTP_ENVIRONMENT_VARIABLE = "http_proxy";
static constexpr auto PROXY_HTTPS_ENVIRONMENT_VARIABLE = "https_proxy";
static constexpr auto NO_PROXY_ENVIRONMENT_VARIABLE = "no_proxy";
EnvironmentProxyConfigurationResolver::EnvironmentProxyConfigurationResolver(
Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_)
@ -34,31 +36,60 @@ namespace
return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
}
}
const char * getNoProxyHosts()
{
return std::getenv(NO_PROXY_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
}
ProxyConfiguration buildProxyConfiguration(
ProxyConfiguration::Protocol request_protocol,
const Poco::URI & uri,
const std::string & no_proxy_hosts_string,
bool disable_tunneling_for_https_requests_over_http_proxy)
{
if (uri.empty())
{
return {};
}
const auto & host = uri.getHost();
const auto & scheme = uri.getScheme();
const auto port = uri.getPort();
const bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling(
request_protocol,
ProxyConfiguration::protocolFromString(scheme),
disable_tunneling_for_https_requests_over_http_proxy);
LOG_TRACE(getLogger("EnvironmentProxyConfigurationResolver"), "Use proxy from environment: {}://{}:{}", scheme, host, port);
return ProxyConfiguration {
host,
ProxyConfiguration::protocolFromString(scheme),
port,
use_tunneling_for_https_requests_over_http_proxy,
request_protocol,
no_proxy_hosts_string
};
}
}
ProxyConfiguration EnvironmentProxyConfigurationResolver::resolve()
{
const auto * proxy_host = getProxyHost(request_protocol);
static const auto * http_proxy_host = getProxyHost(Protocol::HTTP);
static const auto * https_proxy_host = getProxyHost(Protocol::HTTPS);
static const auto * no_proxy = getNoProxyHosts();
static const auto poco_no_proxy_hosts = no_proxy ? buildPocoNonProxyHosts(no_proxy) : "";
if (!proxy_host)
{
return {};
}
static const Poco::URI http_proxy_uri(http_proxy_host ? http_proxy_host : "");
static const Poco::URI https_proxy_uri(https_proxy_host ? https_proxy_host : "");
auto uri = Poco::URI(proxy_host);
auto host = uri.getHost();
auto scheme = uri.getScheme();
auto port = uri.getPort();
LOG_TRACE(getLogger("EnvironmentProxyConfigurationResolver"), "Use proxy from environment: {}://{}:{}", scheme, host, port);
return ProxyConfiguration {
host,
ProxyConfiguration::protocolFromString(scheme),
port,
useTunneling(request_protocol, ProxyConfiguration::protocolFromString(scheme), disable_tunneling_for_https_requests_over_http_proxy),
request_protocol
};
return buildProxyConfiguration(
request_protocol,
request_protocol == Protocol::HTTP ? http_proxy_uri : https_proxy_uri,
poco_no_proxy_hosts,
disable_tunneling_for_https_requests_over_http_proxy);
}
}

226
src/Common/GWPAsan.cpp Normal file
View File

@ -0,0 +1,226 @@
#include <Common/GWPAsan.h>
#if USE_GWP_ASAN
# include <IO/ReadHelpers.h>
# include <gwp_asan/common.h>
# include <gwp_asan/crash_handler.h>
# include <gwp_asan/guarded_pool_allocator.h>
# include <gwp_asan/optional/options_parser.h>
# include <Common/ErrorCodes.h>
# include <Common/Exception.h>
# include <Common/Logger.h>
# include <Common/StackTrace.h>
# include <Common/logger_useful.h>
# include <atomic>
# include <iostream>
namespace GWPAsan
{
namespace
{
size_t getBackTrace(uintptr_t * trace_buffer, size_t buffer_size)
{
StackTrace stacktrace;
auto trace_size = std::min(buffer_size, stacktrace.getSize());
const auto & frame_pointers = stacktrace.getFramePointers();
memcpy(trace_buffer, frame_pointers.data(), trace_size * sizeof(uintptr_t));
return trace_size;
}
__attribute__((__format__ (__printf__, 1, 0)))
void printString(const char * format, ...) // NOLINT(cert-dcl50-cpp)
{
std::array<char, 1024> formatted;
va_list args;
va_start(args, format);
if (vsnprintf(formatted.data(), formatted.size(), format, args) > 0)
std::cerr << formatted.data() << std::endl;
va_end(args);
}
}
gwp_asan::GuardedPoolAllocator GuardedAlloc;
static bool guarded_alloc_initialized = []
{
const char * env_options_raw = std::getenv("GWP_ASAN_OPTIONS"); // NOLINT(concurrency-mt-unsafe)
if (env_options_raw)
gwp_asan::options::initOptions(env_options_raw, printString);
auto & opts = gwp_asan::options::getOptions();
if (!env_options_raw || !std::string_view{env_options_raw}.contains("MaxSimultaneousAllocations"))
opts.MaxSimultaneousAllocations = 1024;
if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate"))
opts.SampleRate = 50000;
opts.Backtrace = getBackTrace;
GuardedAlloc.init(opts);
return true;
}();
bool isGWPAsanError(uintptr_t fault_address)
{
const auto * state = GuardedAlloc.getAllocatorState();
if (state->FailureType != gwp_asan::Error::UNKNOWN && state->FailureAddress != 0)
return true;
return fault_address < state->GuardedPagePoolEnd && state->GuardedPagePool <= fault_address;
}
namespace
{
struct ScopedEndOfReportDecorator
{
explicit ScopedEndOfReportDecorator(Poco::LoggerPtr log_) : log(std::move(log_)) { }
~ScopedEndOfReportDecorator() { LOG_FATAL(log, "*** End GWP-ASan report ***"); }
Poco::LoggerPtr log;
};
// Prints the provided error and metadata information.
void printHeader(gwp_asan::Error error, uintptr_t fault_address, const gwp_asan::AllocationMetadata * allocation_meta, Poco::LoggerPtr log)
{
bool access_was_in_bounds = false;
std::string description;
if (error != gwp_asan::Error::UNKNOWN && allocation_meta != nullptr)
{
uintptr_t address = __gwp_asan_get_allocation_address(allocation_meta);
size_t size = __gwp_asan_get_allocation_size(allocation_meta);
if (fault_address < address)
{
description = fmt::format(
"({} byte{} to the left of a {}-byte allocation at 0x{}) ",
address - fault_address,
(address - fault_address == 1) ? "" : "s",
size,
address);
}
else if (fault_address > address)
{
description = fmt::format(
"({} byte{} to the right of a {}-byte allocation at 0x{}) ",
fault_address - address,
(fault_address - address == 1) ? "" : "s",
size,
address);
}
else if (error == gwp_asan::Error::DOUBLE_FREE)
{
description = fmt::format("(a {}-byte allocation) ", size);
}
else
{
access_was_in_bounds = true;
description = fmt::format(
"({} byte{} into a {}-byte allocation at 0x{}) ",
fault_address - address,
(fault_address - address == 1) ? "" : "s",
size,
address);
}
}
uint64_t thread_id = gwp_asan::getThreadID();
std::string thread_id_string = thread_id == gwp_asan::kInvalidThreadID ? "<unknown" : fmt::format("{}", thread_id);
std::string_view out_of_bounds_and_use_after_free_warning;
if (error == gwp_asan::Error::USE_AFTER_FREE && !access_was_in_bounds)
{
out_of_bounds_and_use_after_free_warning = " (warning: buffer overflow/underflow detected on a free()'d "
"allocation. This either means you have a buffer-overflow and a "
"use-after-free at the same time, or you have a long-lived "
"use-after-free bug where the allocation/deallocation metadata below "
"has already been overwritten and is likely bogus)";
}
LOG_FATAL(
log,
"{}{} at 0x{} {}by thread {} here:",
gwp_asan::ErrorToString(error),
out_of_bounds_and_use_after_free_warning,
fault_address,
description,
thread_id_string);
}
}
void printReport([[maybe_unused]] uintptr_t fault_address)
{
const auto logger = getLogger("GWPAsan");
const auto * state = GuardedAlloc.getAllocatorState();
if (uintptr_t internal_error_ptr = __gwp_asan_get_internal_crash_address(state); internal_error_ptr)
fault_address = internal_error_ptr;
const gwp_asan::AllocationMetadata * allocation_meta = __gwp_asan_get_metadata(state, GuardedAlloc.getMetadataRegion(), fault_address);
static constexpr std::string_view unknown_crash_text =
"GWP-ASan cannot provide any more information about this error. This may "
"occur due to a wild memory access into the GWP-ASan pool, or an "
"overflow/underflow that is > 512B in length.\n";
if (allocation_meta == nullptr)
{
LOG_FATAL(logger, "*** GWP-ASan detected a memory error ***");
ScopedEndOfReportDecorator decorator(logger);
LOG_FATAL(logger, fmt::runtime(unknown_crash_text));
return;
}
LOG_FATAL(logger, "*** GWP-ASan detected a memory error ***");
ScopedEndOfReportDecorator decorator(logger);
gwp_asan::Error error = __gwp_asan_diagnose_error(state, allocation_meta, fault_address);
if (error == gwp_asan::Error::UNKNOWN)
{
LOG_FATAL(logger, fmt::runtime(unknown_crash_text));
return;
}
// Print the error header.
printHeader(error, fault_address, allocation_meta, logger);
static constexpr size_t maximum_stack_frames = 512;
std::array<uintptr_t, maximum_stack_frames> trace;
// Maybe print the deallocation trace.
if (__gwp_asan_is_deallocated(allocation_meta))
{
uint64_t thread_id = __gwp_asan_get_deallocation_thread_id(allocation_meta);
if (thread_id == gwp_asan::kInvalidThreadID)
LOG_FATAL(logger, "0x{} was deallocated by thread <unknown> here:", fault_address);
else
LOG_FATAL(logger, "0x{} was deallocated by thread {} here:", fault_address, thread_id);
const auto trace_length = __gwp_asan_get_deallocation_trace(allocation_meta, trace.data(), maximum_stack_frames);
StackTrace::toStringEveryLine(
reinterpret_cast<void **>(trace.data()), 0, trace_length, [&](const auto line) { LOG_FATAL(logger, fmt::runtime(line)); });
}
// Print the allocation trace.
uint64_t thread_id = __gwp_asan_get_allocation_thread_id(allocation_meta);
if (thread_id == gwp_asan::kInvalidThreadID)
LOG_FATAL(logger, "0x{} was allocated by thread <unknown> here:", fault_address);
else
LOG_FATAL(logger, "0x{} was allocated by thread {} here:", fault_address, thread_id);
const auto trace_length = __gwp_asan_get_allocation_trace(allocation_meta, trace.data(), maximum_stack_frames);
StackTrace::toStringEveryLine(
reinterpret_cast<void **>(trace.data()), 0, trace_length, [&](const auto line) { LOG_FATAL(logger, fmt::runtime(line)); });
}
std::atomic<double> force_sample_probability = 0.0;
void setForceSampleProbability(double value)
{
force_sample_probability.store(value, std::memory_order_relaxed);
}
}
#endif

34
src/Common/GWPAsan.h Normal file
View File

@ -0,0 +1,34 @@
#pragma once
#include "config.h"
#if USE_GWP_ASAN
#include <gwp_asan/guarded_pool_allocator.h>
#include <Common/thread_local_rng.h>
#include <atomic>
#include <random>
namespace GWPAsan
{
extern gwp_asan::GuardedPoolAllocator GuardedAlloc;
bool isGWPAsanError(uintptr_t fault_address);
void printReport(uintptr_t fault_address);
extern std::atomic<double> force_sample_probability;
void setForceSampleProbability(double value);
inline bool shouldForceSample()
{
std::bernoulli_distribution dist(force_sample_probability.load(std::memory_order_relaxed));
return dist(thread_local_rng);
}
}
#endif

View File

@ -9,6 +9,7 @@
#include <Common/ProxyConfiguration.h>
#include <Common/MemoryTrackerSwitcher.h>
#include <Common/SipHash.h>
#include <Common/proxyConfigurationToPocoProxyConfig.h>
#include <Poco/Net/HTTPChunkedStream.h>
#include <Poco/Net/HTTPClientSession.h>
@ -70,20 +71,6 @@ namespace CurrentMetrics
namespace
{
Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration)
{
Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
poco_proxy_config.host = proxy_configuration.host;
poco_proxy_config.port = proxy_configuration.port;
poco_proxy_config.protocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.protocol);
poco_proxy_config.tunnel = proxy_configuration.tunneling;
poco_proxy_config.originalRequestProtocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol);
return poco_proxy_config;
}
constexpr size_t roundUp(size_t x, size_t rounding)
{
chassert(rounding > 0);
@ -696,7 +683,8 @@ struct EndpointPoolKey
proxy_config.port,
proxy_config.protocol,
proxy_config.tunneling,
proxy_config.original_request_protocol)
proxy_config.original_request_protocol,
proxy_config.no_proxy_hosts)
== std::tie(
rhs.connection_group,
rhs.target_host,
@ -706,7 +694,8 @@ struct EndpointPoolKey
rhs.proxy_config.port,
rhs.proxy_config.protocol,
rhs.proxy_config.tunneling,
rhs.proxy_config.original_request_protocol);
rhs.proxy_config.original_request_protocol,
rhs.proxy_config.no_proxy_hosts);
}
};

View File

@ -1,17 +1,20 @@
#pragma once
#include <Common/Allocator.h>
#include <Common/BitHelpers.h>
#include <Common/memcpySmall.h>
#include <Common/PODArray_fwd.h>
#include "config.h"
#include <base/getPageSize.h>
#include <boost/noncopyable.hpp>
#include <Common/Allocator.h>
#include <Common/BitHelpers.h>
#include <Common/GWPAsan.h>
#include <Common/PODArray_fwd.h>
#include <Common/memcpySmall.h>
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <cstddef>
#include <cassert>
#include <algorithm>
#include <memory>
#ifndef NDEBUG
#include <sys/mman.h>
@ -112,6 +115,11 @@ protected:
template <typename ... TAllocatorParams>
void alloc(size_t bytes, TAllocatorParams &&... allocator_params)
{
#if USE_GWP_ASAN
if (unlikely(GWPAsan::shouldForceSample()))
gwp_asan::getThreadLocals()->NextSampleCounter = 1;
#endif
char * allocated = reinterpret_cast<char *>(TAllocator::alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...));
c_start = allocated + pad_left;
@ -141,6 +149,11 @@ protected:
return;
}
#if USE_GWP_ASAN
if (unlikely(GWPAsan::shouldForceSample()))
gwp_asan::getThreadLocals()->NextSampleCounter = 1;
#endif
unprotect();
ptrdiff_t end_diff = c_end - c_start;

View File

@ -754,6 +754,10 @@ The server successfully detected this situation and will download merged part fr
\
M(ReadWriteBufferFromHTTPRequestsSent, "Number of HTTP requests sent by ReadWriteBufferFromHTTP") \
M(ReadWriteBufferFromHTTPBytes, "Total size of payload bytes received and sent by ReadWriteBufferFromHTTP. Doesn't include HTTP headers.") \
\
M(GWPAsanAllocateSuccess, "Number of successful allocations done by GWPAsan") \
M(GWPAsanAllocateFailed, "Number of failed allocations done by GWPAsan (i.e. filled pool)") \
M(GWPAsanFree, "Number of free operations done by GWPAsan") \
#ifdef APPLY_FOR_EXTERNAL_EVENTS

View File

@ -40,6 +40,7 @@ namespace ProfileEvents
Timer(Counters & counters_, Event timer_event_, Event counter_event, Resolution resolution_);
~Timer() { end(); }
void cancel() { watch.reset(); }
void restart() { watch.restart(); }
void end();
UInt64 get();

View File

@ -44,11 +44,18 @@ struct ProxyConfiguration
}
}
static bool useTunneling(Protocol request_protocol, Protocol proxy_protocol, bool disable_tunneling_for_https_requests_over_http_proxy)
{
bool is_https_request_over_http_proxy = request_protocol == Protocol::HTTPS && proxy_protocol == Protocol::HTTP;
return is_https_request_over_http_proxy && !disable_tunneling_for_https_requests_over_http_proxy;
}
std::string host = std::string{};
Protocol protocol = Protocol::HTTP;
uint16_t port = 0;
bool tunneling = false;
Protocol original_request_protocol = Protocol::HTTP;
std::string no_proxy_hosts = std::string{};
bool isEmpty() const { return host.empty(); }
};

View File

@ -19,13 +19,6 @@ struct ProxyConfigurationResolver
virtual void errorReport(const ProxyConfiguration & config) = 0;
protected:
static bool useTunneling(Protocol request_protocol, Protocol proxy_protocol, bool disable_tunneling_for_https_requests_over_http_proxy)
{
bool is_https_request_over_http_proxy = request_protocol == Protocol::HTTPS && proxy_protocol == Protocol::HTTP;
return is_https_request_over_http_proxy && !disable_tunneling_for_https_requests_over_http_proxy;
}
Protocol request_protocol;
bool disable_tunneling_for_https_requests_over_http_proxy = false;
};

View File

@ -1,6 +1,7 @@
#include <Common/ProxyConfigurationResolverProvider.h>
#include <Common/EnvironmentProxyConfigurationResolver.h>
#include <Common/proxyConfigurationToPocoProxyConfig.h>
#include <Common/Exception.h>
#include <Common/ProxyListConfigurationResolver.h>
#include <Common/RemoteProxyConfigurationResolver.h>
@ -17,6 +18,11 @@ namespace ErrorCodes
namespace
{
std::string getNoProxyHosts(const Poco::Util::AbstractConfiguration & configuration)
{
return configuration.getString("proxy.no_proxy", "");
}
bool isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(
const Poco::Util::AbstractConfiguration & configuration)
{
@ -49,6 +55,7 @@ namespace
return std::make_shared<RemoteProxyConfigurationResolver>(
server_configuration,
request_protocol,
buildPocoNonProxyHosts(getNoProxyHosts(configuration)),
std::make_shared<RemoteProxyHostFetcherImpl>(),
isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
}
@ -88,7 +95,11 @@ namespace
return uris.empty()
? nullptr
: std::make_shared<ProxyListConfigurationResolver>(uris, request_protocol, isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
: std::make_shared<ProxyListConfigurationResolver>(
uris,
request_protocol,
buildPocoNonProxyHosts(getNoProxyHosts(configuration)),
isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
}
bool hasRemoteResolver(const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)

View File

@ -1,7 +1,6 @@
#include <Common/ProxyListConfigurationResolver.h>
#include <Common/StringUtils.h>
#include <Common/logger_useful.h>
#include <Poco/URI.h>
namespace DB
@ -9,8 +8,11 @@ namespace DB
ProxyListConfigurationResolver::ProxyListConfigurationResolver(
std::vector<Poco::URI> proxies_,
Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_)
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), proxies(std::move(proxies_))
Protocol request_protocol_,
const std::string & no_proxy_hosts_,
bool disable_tunneling_for_https_requests_over_http_proxy_)
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_),
proxies(std::move(proxies_)), no_proxy_hosts(no_proxy_hosts_)
{
}
@ -26,12 +28,18 @@ ProxyConfiguration ProxyListConfigurationResolver::resolve()
auto & proxy = proxies[index];
bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling(
request_protocol,
ProxyConfiguration::protocolFromString(proxy.getScheme()),
disable_tunneling_for_https_requests_over_http_proxy);
return ProxyConfiguration {
proxy.getHost(),
ProxyConfiguration::protocolFromString(proxy.getScheme()),
proxy.getPort(),
useTunneling(request_protocol, ProxyConfiguration::protocolFromString(proxy.getScheme()), disable_tunneling_for_https_requests_over_http_proxy),
request_protocol
use_tunneling_for_https_requests_over_http_proxy,
request_protocol,
no_proxy_hosts
};
}

View File

@ -15,7 +15,11 @@ namespace DB
class ProxyListConfigurationResolver : public ProxyConfigurationResolver
{
public:
ProxyListConfigurationResolver(std::vector<Poco::URI> proxies_, Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
ProxyListConfigurationResolver(
std::vector<Poco::URI> proxies_,
Protocol request_protocol_,
const std::string & no_proxy_hosts_,
bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
ProxyConfiguration resolve() override;
@ -23,6 +27,7 @@ public:
private:
std::vector<Poco::URI> proxies;
std::string no_proxy_hosts;
/// Access counter to get proxy using round-robin strategy.
std::atomic<size_t> access_counter;

View File

@ -42,11 +42,12 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const
RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver(
const RemoteServerConfiguration & remote_server_configuration_,
Protocol request_protocol_,
const std::string & no_proxy_hosts_,
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
bool disable_tunneling_for_https_requests_over_http_proxy_
)
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_),
remote_server_configuration(remote_server_configuration_), fetcher(fetcher_)
remote_server_configuration(remote_server_configuration_), no_proxy_hosts(no_proxy_hosts_), fetcher(fetcher_)
{
}
@ -84,7 +85,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string);
bool use_tunneling_for_https_requests_over_http_proxy = useTunneling(
bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling(
request_protocol,
proxy_protocol,
disable_tunneling_for_https_requests_over_http_proxy);
@ -94,6 +95,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
cached_config.port = proxy_port;
cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy;
cached_config.original_request_protocol = request_protocol;
cached_config.no_proxy_hosts = no_proxy_hosts;
cache_timestamp = std::chrono::system_clock::now();
cache_valid = true;

View File

@ -41,6 +41,7 @@ public:
RemoteProxyConfigurationResolver(
const RemoteServerConfiguration & remote_server_configuration_,
Protocol request_protocol_,
const std::string & no_proxy_hosts_,
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
@ -50,6 +51,7 @@ public:
private:
RemoteServerConfiguration remote_server_configuration;
std::string no_proxy_hosts;
std::shared_ptr<RemoteProxyHostFetcher> fetcher;
std::mutex cache_mutex;

View File

@ -5,6 +5,8 @@
#include <Common/Concepts.h>
#include <Common/CurrentMemoryTracker.h>
#include <Common/ProfileEvents.h>
#include <Common/GWPAsan.h>
#include "config.h"
#if USE_JEMALLOC
@ -15,11 +17,12 @@
# include <cstdlib>
#endif
#if USE_GWP_ASAN
# include <gwp_asan/guarded_pool_allocator.h>
static gwp_asan::GuardedPoolAllocator GuardedAlloc;
#endif
namespace ProfileEvents
{
extern const Event GWPAsanAllocateSuccess;
extern const Event GWPAsanAllocateFailed;
extern const Event GWPAsanFree;
}
namespace Memory
{
@ -34,17 +37,31 @@ requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
{
if constexpr (sizeof...(TAlign) == 1)
{
if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align...)))
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignToSizeT(align...)))
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
return ptr;
}
else
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
}
}
else
{
if (void * ptr = GuardedAlloc.allocate(size))
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size))
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
return ptr;
}
else
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
}
}
}
@ -66,10 +83,17 @@ inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
{
if (void * ptr = GuardedAlloc.allocate(size))
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size))
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
return ptr;
}
else
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
}
}
#endif
return malloc(size);
@ -78,10 +102,17 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.shouldSample()))
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
{
if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align)))
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignToSizeT(align)))
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
return ptr;
}
else
{
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
}
}
#endif
return aligned_alloc(static_cast<size_t>(align), size);
@ -90,9 +121,10 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align)
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
ProfileEvents::increment(ProfileEvents::GWPAsanFree);
GWPAsan::GuardedAlloc.deallocate(ptr);
return;
}
#endif
@ -109,9 +141,10 @@ inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size, TAlign... al
return;
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
ProfileEvents::increment(ProfileEvents::GWPAsanFree);
GWPAsan::GuardedAlloc.deallocate(ptr);
return;
}
#endif
@ -129,9 +162,10 @@ requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]], TAlign... /* align */) noexcept
{
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr)))
{
GuardedAlloc.deallocate(ptr);
ProfileEvents::increment(ProfileEvents::GWPAsanFree);
GWPAsan::GuardedAlloc.deallocate(ptr);
return;
}
#endif
@ -183,10 +217,10 @@ inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], Allocatio
std::size_t actual_size = 0;
#if USE_GWP_ASAN
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr)))
{
if (!size)
size = GuardedAlloc.getSize(ptr);
size = GWPAsan::GuardedAlloc.getSize(ptr);
trace = CurrentMemoryTracker::free(size);
return size;
}

View File

@ -1,5 +1,4 @@
#include <cassert>
#include <iostream>
#include <new>
#include "config.h"
#include <Common/memory.h>
@ -42,27 +41,6 @@ static struct InitializeJemallocZoneAllocatorForOSX
} initializeJemallocZoneAllocatorForOSX;
#endif
#if USE_GWP_ASAN
#include <gwp_asan/optional/options_parser.h>
/// Both clickhouse_new_delete and clickhouse_common_io links gwp_asan, but It should only init once, otherwise it
/// will cause unexpected deadlock.
static struct InitGwpAsan
{
InitGwpAsan()
{
gwp_asan::options::initOptions();
gwp_asan::options::Options &opts = gwp_asan::options::getOptions();
GuardedAlloc.init(opts);
///std::cerr << "GwpAsan is initialized, the options are { Enabled: " << opts.Enabled
/// << ", MaxSimultaneousAllocations: " << opts.MaxSimultaneousAllocations
/// << ", SampleRate: " << opts.SampleRate << " }\n";
}
} init_gwp_asan;
#endif
/// Replace default new/delete with memory tracking versions.
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
/// https://en.cppreference.com/w/cpp/memory/new/operator_delete

View File

@ -0,0 +1,117 @@
#include <Common/proxyConfigurationToPocoProxyConfig.h>
#include <Common/StringUtils.h>
#include <base/find_symbols.h>
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
#pragma clang diagnostic ignored "-Wnested-anon-types"
#pragma clang diagnostic ignored "-Wunused-parameter"
#pragma clang diagnostic ignored "-Wshadow-field-in-constructor"
#pragma clang diagnostic ignored "-Wdtor-name"
#include <re2/re2.h>
#pragma clang diagnostic pop
namespace DB
{
namespace
{
/*
* Copy `curl` behavior instead of `wget` as it seems to be more flexible.
* `curl` strips leading dot and accepts url gitlab.com as a match for no_proxy .gitlab.com,
* while `wget` does an exact match.
* */
std::string buildPocoRegexpEntryWithoutLeadingDot(const std::string & host)
{
std::string_view view_without_leading_dot = host;
if (host[0] == '.')
{
view_without_leading_dot = std::string_view {host.begin() + 1u, host.end()};
}
return RE2::QuoteMeta(view_without_leading_dot);
}
}
/*
* Even though there is not an RFC that defines NO_PROXY, it is usually a comma-separated list of domains.
* Different tools implement their own versions of `NO_PROXY` support. Some support CIDR blocks, some support wildcard etc.
* Opting for a simple implementation that covers most use cases:
* * Support only single wildcard * (match anything)
* * Match subdomains
* * Strip leading dots
* * No regex
* * No CIDR blocks
* * No fancy stuff about loopback IPs
* https://about.gitlab.com/blog/2021/01/27/we-need-to-talk-no-proxy/
* Open for discussions
* */
std::string buildPocoNonProxyHosts(const std::string & no_proxy_hosts_string)
{
if (no_proxy_hosts_string.empty())
{
return "";
}
static constexpr auto OR_SEPARATOR = "|";
static constexpr auto MATCH_ANYTHING = R"(.*)";
static constexpr auto MATCH_SUBDOMAINS_REGEX = R"((?:.*\.)?)";
bool match_any_host = no_proxy_hosts_string.size() == 1 && no_proxy_hosts_string[0] == '*';
if (match_any_host)
{
return MATCH_ANYTHING;
}
std::vector<std::string> no_proxy_hosts;
splitInto<','>(no_proxy_hosts, no_proxy_hosts_string);
bool first = true;
std::string result;
for (auto & host : no_proxy_hosts)
{
trim(host);
if (host.empty())
{
continue;
}
if (!first)
{
result.append(OR_SEPARATOR);
}
auto escaped_host_without_leading_dot = buildPocoRegexpEntryWithoutLeadingDot(host);
result.append(MATCH_SUBDOMAINS_REGEX);
result.append(escaped_host_without_leading_dot);
first = false;
}
return result;
}
Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration)
{
Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
poco_proxy_config.host = proxy_configuration.host;
poco_proxy_config.port = proxy_configuration.port;
poco_proxy_config.protocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.protocol);
poco_proxy_config.tunnel = proxy_configuration.tunneling;
poco_proxy_config.originalRequestProtocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol);
poco_proxy_config.nonProxyHosts = proxy_configuration.no_proxy_hosts;
return poco_proxy_config;
}
}

View File

@ -0,0 +1,13 @@
#pragma once
#include <Poco/Net/HTTPClientSession.h>
#include <Common/ProxyConfiguration.h>
namespace DB
{
Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration);
std::string buildPocoNonProxyHosts(const std::string & no_proxy_hosts_string);
}

View File

@ -76,22 +76,28 @@ inline std::string xmlNodeAsString(Poco::XML::Node *pNode)
struct EnvironmentProxySetter
{
EnvironmentProxySetter(const Poco::URI & http_proxy, const Poco::URI & https_proxy)
{
if (!http_proxy.empty())
{
setenv("http_proxy", http_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe)
}
static constexpr auto * NO_PROXY = "*";
static constexpr auto * HTTP_PROXY = "http://proxy_server:3128";
static constexpr auto * HTTPS_PROXY = "https://proxy_server:3128";
if (!https_proxy.empty())
{
setenv("https_proxy", https_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe)
}
EnvironmentProxySetter()
{
setenv("http_proxy", HTTP_PROXY, 1); // NOLINT(concurrency-mt-unsafe)
setenv("https_proxy", HTTPS_PROXY, 1); // NOLINT(concurrency-mt-unsafe)
// Some other tests rely on HTTP clients (e.g, gtest_aws_s3_client), which depend on proxy configuration
// since in https://github.com/ClickHouse/ClickHouse/pull/63314 the environment proxy resolver reads only once
// from the environment, the proxy configuration will always be there.
// The problem is that the proxy server does not exist, causing the test to fail.
// To work around this issue, `no_proxy` is set to bypass all domains.
setenv("no_proxy", NO_PROXY, 1); // NOLINT(concurrency-mt-unsafe)
}
~EnvironmentProxySetter()
{
unsetenv("http_proxy"); // NOLINT(concurrency-mt-unsafe)
unsetenv("https_proxy"); // NOLINT(concurrency-mt-unsafe)
unsetenv("no_proxy"); // NOLINT(concurrency-mt-unsafe)
}
};

View File

@ -0,0 +1,24 @@
#include <gtest/gtest.h>
#include <Common/proxyConfigurationToPocoProxyConfig.h>
TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuild)
{
ASSERT_EQ(
DB::buildPocoNonProxyHosts("localhost,127.0.0.1,some_other_domain:8080,sub-domain.domain.com"),
R"((?:.*\.)?localhost|(?:.*\.)?127\.0\.0\.1|(?:.*\.)?some_other_domain\:8080|(?:.*\.)?sub\-domain\.domain\.com)");
}
TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuildMatchAnything)
{
ASSERT_EQ(
DB::buildPocoNonProxyHosts("*"),
".*");
}
TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuildEmpty)
{
ASSERT_EQ(
DB::buildPocoNonProxyHosts(""),
"");
}

View File

@ -1,6 +1,9 @@
#include <gtest/gtest.h>
#include <Common/ProxyConfigurationResolverProvider.h>
#include <Common/RemoteProxyConfigurationResolver.h>
#include <Common/ProxyListConfigurationResolver.h>
#include <Common/EnvironmentProxyConfigurationResolver.h>
#include <Common/tests/gtest_global_context.h>
#include <Common/tests/gtest_helper_functions.h>
@ -25,27 +28,19 @@ protected:
DB::ContextMutablePtr ProxyConfigurationResolverProviderTests::context;
Poco::URI http_env_proxy_server = Poco::URI("http://http_environment_proxy:3128");
Poco::URI https_env_proxy_server = Poco::URI("http://https_environment_proxy:3128");
Poco::URI http_list_proxy_server = Poco::URI("http://http_list_proxy:3128");
Poco::URI https_list_proxy_server = Poco::URI("http://https_list_proxy:3128");
TEST_F(ProxyConfigurationResolverProviderTests, EnvironmentResolverShouldBeUsedIfNoSettings)
{
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
EnvironmentProxySetter setter;
const auto & config = getContext().context->getConfigRef();
auto http_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, config)->resolve();
auto https_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, config)->resolve();
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, config);
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, config);
ASSERT_EQ(http_configuration.host, http_env_proxy_server.getHost());
ASSERT_EQ(http_configuration.port, http_env_proxy_server.getPort());
ASSERT_EQ(http_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme()));
ASSERT_EQ(https_configuration.host, https_env_proxy_server.getHost());
ASSERT_EQ(https_configuration.port, https_env_proxy_server.getPort());
ASSERT_EQ(https_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme()));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(http_resolver));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(https_resolver));
}
TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPOnly)
@ -57,17 +52,11 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPOnly)
config->setString("proxy.http.uri", http_list_proxy_server.toString());
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost());
ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort());
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme()));
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
// No https configuration since it's not set
ASSERT_EQ(https_proxy_configuration.host, "");
ASSERT_EQ(https_proxy_configuration.port, 0);
ASSERT_TRUE(std::dynamic_pointer_cast<DB::ProxyListConfigurationResolver>(http_resolver));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(https_resolver));
}
TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPSOnly)
@ -79,18 +68,11 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPSOnly)
config->setString("proxy.https.uri", https_list_proxy_server.toString());
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
ASSERT_EQ(http_proxy_configuration.host, "");
ASSERT_EQ(http_proxy_configuration.port, 0);
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost());
// still HTTP because the proxy host is not HTTPS
ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme()));
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(http_resolver));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::ProxyListConfigurationResolver>(https_resolver));
}
TEST_F(ProxyConfigurationResolverProviderTests, ListBoth)
@ -107,70 +89,15 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListBoth)
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost());
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme()));
ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort());
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost());
// still HTTP because the proxy host is not HTTPS
ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme()));
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
}
TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolConfigurationHTTP)
{
/*
* Since there is no way to call `ProxyConfigurationResolver::resolve` on remote resolver,
* it is hard to verify the remote resolver was actually picked. One hackish way to assert
* the remote resolver was OR was not picked based on the configuration, is to use the
* environment resolver. Since the environment resolver is always returned as a fallback,
* we can assert the remote resolver was not picked if `ProxyConfigurationResolver::resolve`
* succeeds and returns an environment proxy configuration.
* */
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
config->setString("proxy.https", "");
config->setString("proxy.https.resolver", "");
config->setString("proxy.https.resolver.endpoint", "http://resolver:8080/hostname");
// even tho proxy protocol / scheme is http, it should not be picked (prior to this PR, it would be picked)
config->setString("proxy.https.resolver.proxy_scheme", "http");
config->setString("proxy.https.resolver.proxy_port", "80");
config->setString("proxy.https.resolver.proxy_cache_time", "10");
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
/*
* Asserts env proxy is used and not the remote resolver. If the remote resolver is picked, it is an error because
* there is no `http` specification for remote resolver
* */
ASSERT_EQ(http_proxy_configuration.host, http_env_proxy_server.getHost());
ASSERT_EQ(http_proxy_configuration.port, http_env_proxy_server.getPort());
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme()));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::ProxyListConfigurationResolver>(http_resolver));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::ProxyListConfigurationResolver>(https_resolver));
}
TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolConfigurationHTTPS)
{
/*
* Since there is no way to call `ProxyConfigurationResolver::resolve` on remote resolver,
* it is hard to verify the remote resolver was actually picked. One hackish way to assert
* the remote resolver was OR was not picked based on the configuration, is to use the
* environment resolver. Since the environment resolver is always returned as a fallback,
* we can assert the remote resolver was not picked if `ProxyConfigurationResolver::resolve`
* succeeds and returns an environment proxy configuration.
* */
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
@ -185,27 +112,44 @@ TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolC
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
/*
* Asserts env proxy is used and not the remote resolver. If the remote resolver is picked, it is an error because
* there is no `http` specification for remote resolver
* */
ASSERT_EQ(http_proxy_configuration.host, https_env_proxy_server.getHost());
ASSERT_EQ(http_proxy_configuration.port, https_env_proxy_server.getPort());
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme()));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::RemoteProxyConfigurationResolver>(http_resolver));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(https_resolver));
}
// remote resolver is tricky to be tested in unit tests
TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverHTTPSOnly)
{
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
config->setString("proxy.https", "");
config->setString("proxy.https.resolver", "");
config->setString("proxy.https.resolver.endpoint", "http://resolver:8080/hostname");
// even tho proxy protocol / scheme is http, it should not be picked (prior to this PR, it would be picked)
config->setString("proxy.https.resolver.proxy_scheme", "http");
config->setString("proxy.https.resolver.proxy_port", "80");
config->setString("proxy.https.resolver.proxy_cache_time", "10");
context->setConfig(config);
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(http_resolver));
ASSERT_TRUE(std::dynamic_pointer_cast<DB::RemoteProxyConfigurationResolver>(https_resolver));
}
template <bool DISABLE_TUNNELING_FOR_HTTPS_REQUESTS_OVER_HTTP_PROXY, bool STRING>
void test_tunneling(DB::ContextMutablePtr context)
{
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
config->setString("proxy.https", "");
config->setString("proxy.https.uri", http_list_proxy_server.toString());
if constexpr (STRING)
{
@ -230,4 +174,3 @@ TEST_F(ProxyConfigurationResolverProviderTests, TunnelingForHTTPSRequestsOverHTT
test_tunneling<true, false>(context);
test_tunneling<true, true>(context);
}

View File

@ -2,81 +2,38 @@
#include <Common/EnvironmentProxyConfigurationResolver.h>
#include <Common/tests/gtest_helper_functions.h>
#include <Common/proxyConfigurationToPocoProxyConfig.h>
#include <Poco/URI.h>
namespace DB
{
namespace
TEST(EnvironmentProxyConfigurationResolver, TestHTTPandHTTPS)
{
auto http_proxy_server = Poco::URI("http://proxy_server:3128");
auto https_proxy_server = Poco::URI("https://proxy_server:3128");
}
const auto http_proxy_server = Poco::URI(EnvironmentProxySetter::HTTP_PROXY);
const auto https_proxy_server = Poco::URI(EnvironmentProxySetter::HTTPS_PROXY);
TEST(EnvironmentProxyConfigurationResolver, TestHTTP)
{
EnvironmentProxySetter setter(http_proxy_server, {});
std::string poco_no_proxy_regex = buildPocoNonProxyHosts(EnvironmentProxySetter::NO_PROXY);
EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTP);
EnvironmentProxySetter setter;
auto configuration = resolver.resolve();
EnvironmentProxyConfigurationResolver http_resolver(ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
}
auto http_configuration = http_resolver.resolve();
TEST(EnvironmentProxyConfigurationResolver, TestHTTPNoEnv)
{
EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(http_configuration.host, http_proxy_server.getHost());
ASSERT_EQ(http_configuration.port, http_proxy_server.getPort());
ASSERT_EQ(http_configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
ASSERT_EQ(http_configuration.no_proxy_hosts, poco_no_proxy_regex);
auto configuration = resolver.resolve();
EnvironmentProxyConfigurationResolver https_resolver(ProxyConfiguration::Protocol::HTTPS);
ASSERT_EQ(configuration.host, "");
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.port, 0u);
}
auto https_configuration = https_resolver.resolve();
TEST(EnvironmentProxyConfigurationResolver, TestHTTPs)
{
EnvironmentProxySetter setter({}, https_proxy_server);
EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTPS);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, https_proxy_server.getHost());
ASSERT_EQ(configuration.port, https_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
}
TEST(EnvironmentProxyConfigurationResolver, TestHTTPsNoEnv)
{
EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTPS);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, "");
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.port, 0u);
}
TEST(EnvironmentProxyConfigurationResolver, TestHTTPsOverHTTPTunnelingDisabled)
{
// use http proxy for https, this would use connect protocol by default
EnvironmentProxySetter setter({}, http_proxy_server);
bool disable_tunneling_for_https_requests_over_http_proxy = true;
EnvironmentProxyConfigurationResolver resolver(
ProxyConfiguration::Protocol::HTTPS, disable_tunneling_for_https_requests_over_http_proxy);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
ASSERT_EQ(configuration.tunneling, false);
ASSERT_EQ(https_configuration.host, https_proxy_server.getHost());
ASSERT_EQ(https_configuration.port, https_proxy_server.getPort());
ASSERT_EQ(https_configuration.protocol, ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
ASSERT_EQ(https_configuration.no_proxy_hosts, poco_no_proxy_regex);
}
}

View File

@ -10,6 +10,8 @@ namespace
{
auto proxy_server1 = Poco::URI("http://proxy_server1:3128");
auto proxy_server2 = Poco::URI("http://proxy_server2:3128");
std::string no_proxy_hosts = "localhost,,127.0.0.1,some_other_domain,,,, sub-domain.domain.com,";
}
TEST(ProxyListConfigurationResolver, SimpleTest)
@ -17,7 +19,8 @@ TEST(ProxyListConfigurationResolver, SimpleTest)
ProxyListConfigurationResolver resolver(
{proxy_server1, proxy_server2},
ProxyConfiguration::Protocol::HTTP);
ProxyConfiguration::Protocol::HTTP,
no_proxy_hosts);
auto configuration1 = resolver.resolve();
auto configuration2 = resolver.resolve();
@ -25,10 +28,12 @@ TEST(ProxyListConfigurationResolver, SimpleTest)
ASSERT_EQ(configuration1.host, proxy_server1.getHost());
ASSERT_EQ(configuration1.port, proxy_server1.getPort());
ASSERT_EQ(configuration1.protocol, ProxyConfiguration::protocolFromString(proxy_server1.getScheme()));
ASSERT_EQ(configuration1.no_proxy_hosts, no_proxy_hosts);
ASSERT_EQ(configuration2.host, proxy_server2.getHost());
ASSERT_EQ(configuration2.port, proxy_server2.getPort());
ASSERT_EQ(configuration2.protocol, ProxyConfiguration::protocolFromString(proxy_server2.getScheme()));
ASSERT_EQ(configuration2.no_proxy_hosts, no_proxy_hosts);
}
TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault)
@ -36,7 +41,8 @@ TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault)
ProxyListConfigurationResolver resolver(
{proxy_server1, proxy_server2},
ProxyConfiguration::Protocol::HTTPS);
ProxyConfiguration::Protocol::HTTPS,
"");
auto configuration1 = resolver.resolve();
auto configuration2 = resolver.resolve();
@ -45,11 +51,12 @@ TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault)
ASSERT_EQ(configuration1.port, proxy_server1.getPort());
ASSERT_EQ(configuration1.protocol, ProxyConfiguration::protocolFromString(proxy_server1.getScheme()));
ASSERT_EQ(configuration1.tunneling, true);
ASSERT_EQ(configuration1.no_proxy_hosts, "");
ASSERT_EQ(configuration2.host, proxy_server2.getHost());
ASSERT_EQ(configuration2.port, proxy_server2.getPort());
ASSERT_EQ(configuration2.protocol, ProxyConfiguration::protocolFromString(proxy_server2.getScheme()));
ASSERT_EQ(configuration1.tunneling, true);
ASSERT_EQ(configuration2.no_proxy_hosts, "");
}
TEST(ProxyListConfigurationResolver, SimpleTestTunnelingDisabled)
@ -58,6 +65,7 @@ TEST(ProxyListConfigurationResolver, SimpleTestTunnelingDisabled)
ProxyListConfigurationResolver resolver(
{proxy_server1, proxy_server2},
ProxyConfiguration::Protocol::HTTPS,
"",
disable_tunneling_for_https_requests_over_http_proxy);
auto configuration1 = resolver.resolve();

View File

@ -42,6 +42,7 @@ TEST(RemoteProxyConfigurationResolver, HTTPOverHTTP)
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTP,
"",
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
);
@ -68,6 +69,7 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPS)
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTPS,
"",
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
);
@ -95,6 +97,7 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTP)
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTPS,
"",
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
);
@ -122,6 +125,7 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPNoTunneling)
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTPS,
"",
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock),
true /* disable_tunneling_for_https_requests_over_http_proxy_ */
);
@ -153,6 +157,7 @@ TEST(RemoteProxyConfigurationResolver, SimpleCacheTest)
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTP,
"",
fetcher_mock
);

View File

@ -54,7 +54,6 @@ ThreadPoolCallbackRunnerUnsafe<Result, Callback> threadPoolCallbackRunnerUnsafe(
auto future = task->get_future();
/// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority".
/// Note: calling method scheduleOrThrowOnError in intentional, because we don't want to throw exceptions
/// in critical places where this callback runner is used (e.g. loading or deletion of parts)
my_pool->scheduleOrThrowOnError([my_task = std::move(task)]{ (*my_task)(); }, priority);
@ -163,7 +162,6 @@ public:
task->future = task_func->get_future();
/// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority".
/// Note: calling method scheduleOrThrowOnError in intentional, because we don't want to throw exceptions
/// in critical places where this callback runner is used (e.g. loading or deletion of parts)
pool.scheduleOrThrowOnError([my_task = std::move(task_func)]{ (*my_task)(); }, priority);

View File

@ -97,11 +97,11 @@ namespace DB
\
M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \
M(UInt64, max_view_num_to_warn, 10000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \
M(UInt64, max_dictionary_num_to_warn, 1000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \
M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
M(UInt64, max_table_num_to_warn, 5000lu, "If the number of tables is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_view_num_to_warn, 10000lu, "If the number of views is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \
M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
\
@ -146,6 +146,7 @@ namespace DB
M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp

View File

@ -6,6 +6,7 @@
#include <Common/MemoryTracker.h>
#include <Daemon/BaseDaemon.h>
#include <Daemon/SentryWriter.h>
#include <Common/GWPAsan.h>
#include <sys/stat.h>
#include <sys/types.h>
@ -156,6 +157,12 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
const ucontext_t * signal_context = reinterpret_cast<ucontext_t *>(context);
const StackTrace stack_trace(*signal_context);
#if USE_GWP_ASAN
if (const auto fault_address = reinterpret_cast<uintptr_t>(info->si_addr);
GWPAsan::isGWPAsanError(fault_address))
GWPAsan::printReport(fault_address);
#endif
writeBinary(sig, out);
writePODBinary(*info, out);
writePODBinary(signal_context, out);

View File

@ -511,7 +511,10 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumns(
if (default_mask)
{
if (key_state_from_storage.isDefault())
{
(*default_mask)[key_index] = 1;
aggregated_column->insertDefault();
}
else
{
(*default_mask)[key_index] = 0;

View File

@ -869,6 +869,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
|| (min_rows_to_compress && getTotalRowCount() >= min_rows_to_compress)))
{
block_to_save = block_to_save.compress();
have_compressed = true;
}
data->blocks_allocated_size += block_to_save.allocatedBytes();
@ -2317,14 +2318,19 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
}
};
for (const Block & compressed_block_right : data->blocks)
for (const Block & block_right : data->blocks)
{
++block_number;
if (block_number < start_right_block)
continue;
auto block_right = compressed_block_right.decompress();
process_right_block(block_right);
/// The following statement cannot be substituted with `process_right_block(!have_compressed ? block_right : block_right.decompress())`
/// because it will lead to copying of `block_right` even if its branch is taken (because common type of `block_right` and `block_right.decompress()` is `Block`).
if (!have_compressed)
process_right_block(block_right);
else
process_right_block(block_right.decompress());
if (rows_added > max_joined_block_rows)
{
break;

View File

@ -434,7 +434,10 @@ private:
/// Changes in hash table broke correspondence,
/// so we must guarantee constantness of hash table during HashJoin lifetime (using method setLock)
mutable JoinStuff::JoinUsedFlags used_flags;
RightTableDataPtr data;
bool have_compressed = false;
std::vector<Sizes> key_sizes;
/// Needed to do external cross join

View File

@ -2004,8 +2004,7 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands(
MutationCommands commands;
for (auto it = begin; it != end; ++it)
{
/// FIXME uncomment this assertion after relesing 23.5 (currently it fails in Upgrade check)
/// chassert(mutation_pointer < it->second->entry->znode_name);
chassert(mutation_pointer < it->second->entry->znode_name);
mutation_ids.push_back(it->second->entry->znode_name);
const auto & commands_from_entry = it->second->entry->commands;
commands.insert(commands.end(), commands_from_entry.begin(), commands_from_entry.end());

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
import argparse
import logging
import sys
from github import Github
@ -20,84 +20,98 @@ from report import FAILURE, PENDING, SUCCESS, StatusType
from synchronizer_utils import SYNC_BRANCH_PREFIX
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Script to merge the given PR. Additional checks for approved "
"status and green commit statuses could be done",
)
parser.add_argument(
"--wf-status",
type=str,
default="",
help="overall workflow status [success|failure]",
)
return parser.parse_args()
def main():
logging.basicConfig(level=logging.INFO)
args = parse_args()
has_failure = False
# FIXME: temporary hack to fail Mergeable Check in MQ if pipeline has any failed jobs
if len(sys.argv) > 1 and sys.argv[1] == "--pipeline-failure":
has_failure = True
has_workflow_failures = args.wf_status == FAILURE
pr_info = PRInfo(need_orgs=True)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
statuses = None
if pr_info.is_merge_queue:
# in MQ Mergeable check status must never be green if any failures in workflow
if has_failure:
set_mergeable_check(commit, "workflow failed", "failure")
# in MQ Mergeable check status must never be green if any failures in the workflow
if has_workflow_failures:
set_mergeable_check(commit, "workflow failed", FAILURE)
else:
# This must be the only place where green MCheck is set in the MQ (in the end of CI) to avoid early merge
set_mergeable_check(commit, "workflow passed", "success")
else:
statuses = get_commit_filtered_statuses(commit)
state = trigger_mergeable_check(commit, statuses, set_if_green=True)
set_mergeable_check(commit, "workflow passed", SUCCESS)
return
# Process upstream StatusNames.SYNC
if (
pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/")
and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY
):
upstream_pr_number = int(pr_info.head_ref.split("/pr/", maxsplit=1)[1])
update_upstream_sync_status(
upstream_pr_number,
pr_info.number,
gh,
state,
can_set_green_mergeable_status=True,
)
statuses = get_commit_filtered_statuses(commit)
state = trigger_mergeable_check(commit, statuses, set_if_green=True)
ci_running_statuses = [s for s in statuses if s.context == StatusNames.CI]
if not ci_running_statuses:
return
# Take the latest status
ci_status = ci_running_statuses[-1]
# Process upstream StatusNames.SYNC
if (
pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/")
and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY
):
upstream_pr_number = int(pr_info.head_ref.split("/pr/", maxsplit=1)[1])
update_upstream_sync_status(
upstream_pr_number,
pr_info.number,
gh,
state,
can_set_green_mergeable_status=True,
)
has_failure = False
has_pending = False
for status in statuses:
if status.context in (StatusNames.MERGEABLE, StatusNames.CI):
# do not account these statuses
continue
if status.state == PENDING:
if status.context == StatusNames.SYNC:
# do not account sync status if pending - it's a different WF
continue
has_pending = True
elif status.state == SUCCESS:
continue
else:
has_failure = True
ci_running_statuses = [s for s in statuses if s.context == StatusNames.CI]
if not ci_running_statuses:
return
# Take the latest status
ci_status = ci_running_statuses[-1]
ci_state = SUCCESS # type: StatusType
if has_failure:
ci_state = FAILURE
elif has_pending:
print("ERROR: CI must not have pending jobs by the time of finish check")
ci_state = FAILURE
has_failure = False
has_pending = False
error_cnt = 0
for status in statuses:
if status.context in (StatusNames.MERGEABLE, StatusNames.CI, StatusNames.SYNC):
# do not account these statuses
continue
if status.state == PENDING:
has_pending = True
elif status.state != SUCCESS:
has_failure = True
error_cnt += 1
if ci_status.state == PENDING:
post_commit_status(
commit,
ci_state,
ci_status.target_url,
"All checks finished",
StatusNames.CI,
pr_info,
dump_to_file=True,
)
ci_state = SUCCESS # type: StatusType
description = "All checks finished"
if has_failure:
ci_state = FAILURE
description = f"All checks finished. {error_cnt} jobs failed"
elif has_workflow_failures:
ci_state = FAILURE
description = "All checks finished. Workflow has failures."
elif has_pending:
print("ERROR: CI must not have pending jobs by the time of finish check")
description = "ERROR: workflow has pending jobs"
ci_state = FAILURE
post_commit_status(
commit,
ci_state,
ci_status.target_url,
description,
StatusNames.CI,
pr_info,
dump_to_file=True,
)
if __name__ == "__main__":

View File

@ -112,12 +112,12 @@ class GitHub(github.Github):
# pylint: enable=signature-differs
def get_pulls_from_search(self, *args: Any, **kwargs: Any) -> PullRequests:
"""The search api returns actually issues, so we need to fetch PullRequests"""
issues = self.search_issues(*args, **kwargs)
repos = {}
prs = [] # type: PullRequests
progress_func = kwargs.pop(
"progress_func", lambda x: x
) # type: Callable[[Issues], Issues]
issues = self.search_issues(*args, **kwargs)
repos = {}
prs = [] # type: PullRequests
for issue in progress_func(issues):
# See https://github.com/PyGithub/PyGithub/issues/2202,
# obj._rawData doesn't spend additional API requests

View File

@ -30,7 +30,7 @@ def check_proxy_logs(
False
), f"{http_method} method not found in logs of {proxy_instance} for bucket {bucket}"
time.sleep(1)
time.sleep(1)
def wait_resolver(cluster):
@ -124,3 +124,13 @@ def simple_storage_test(cluster, node, proxies, policy):
# not checking for POST because it is in a different format
check_proxy_logs(cluster, proxies, "http", policy, ["PUT", "GET"])
def simple_test_assert_no_proxy(cluster, proxies, protocol, bucket):
minio_endpoint = build_s3_endpoint(protocol, bucket)
node = cluster.instances[bucket]
perform_simple_queries(node, minio_endpoint)
# No HTTP method should be found in proxy logs if no proxy is active
empty_method_list = []
check_proxy_logs(cluster, proxies, protocol, bucket, empty_method_list)

View File

@ -404,6 +404,8 @@ def test_alter_detach_part(started_cluster, engine):
main_node.query(f"INSERT INTO {database}.alter_detach VALUES (123)")
if engine == "MergeTree":
dummy_node.query(f"INSERT INTO {database}.alter_detach VALUES (456)")
else:
main_node.query(f"SYSTEM SYNC REPLICA {database}.alter_detach PULL")
main_node.query(f"ALTER TABLE {database}.alter_detach DETACH PART '{part_name}'")
detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='{database}' AND table='alter_detach'"
assert main_node.query(detached_parts_query) == f"{part_name}\n"

View File

@ -0,0 +1,9 @@
<clickhouse>
<proxy>
<no_proxy>not_important_host,, minio1 ,</no_proxy>
<http>
<uri>http://proxy1</uri>
<uri>http://proxy2</uri>
</http>
</proxy>
</clickhouse>

View File

@ -0,0 +1,18 @@
<clickhouse>
<proxy>
<no_proxy>not_important_host,, minio1 ,</no_proxy>
<!--
At each interaction with S3 resolver sends empty GET request to specified endpoint URL to obtain proxy host.
Proxy host is returned as string in response body.
Then S3 client uses proxy URL formed as proxy_scheme://proxy_host:proxy_port to make request.
-->
<http>
<resolver>
<endpoint>http://resolver:8080/hostname</endpoint>
<proxy_scheme>http</proxy_scheme>
<proxy_port>80</proxy_port>
<proxy_cache_time>10</proxy_cache_time>
</resolver>
</http>
</proxy>
</clickhouse>

View File

@ -19,6 +19,14 @@ def cluster():
with_minio=True,
)
cluster.add_instance(
"remote_proxy_node_no_proxy",
main_configs=[
"configs/config.d/proxy_remote_no_proxy.xml",
],
with_minio=True,
)
cluster.add_instance(
"proxy_list_node",
main_configs=[
@ -27,6 +35,14 @@ def cluster():
with_minio=True,
)
cluster.add_instance(
"proxy_list_node_no_proxy",
main_configs=[
"configs/config.d/proxy_list_no_proxy.xml",
],
with_minio=True,
)
cluster.add_instance(
"env_node",
with_minio=True,
@ -36,6 +52,16 @@ def cluster():
instance_env_variables=True,
)
cluster.add_instance(
"env_node_no_proxy",
with_minio=True,
env_variables={
"http_proxy": "http://proxy1",
"no_proxy": "not_important_host,, minio1 ,",
},
instance_env_variables=True,
)
logging.info("Starting cluster...")
cluster.start()
logging.info("Cluster started")
@ -48,6 +74,24 @@ def cluster():
cluster.shutdown()
def test_s3_with_http_proxy_list_no_proxy(cluster):
proxy_util.simple_test_assert_no_proxy(
cluster, ["proxy1", "proxy2"], "http", "proxy_list_node_no_proxy"
)
def test_s3_with_http_remote_proxy_no_proxy(cluster):
proxy_util.simple_test_assert_no_proxy(
cluster, ["proxy1"], "http", "remote_proxy_node_no_proxy"
)
def test_s3_with_http_env_no_proxy(cluster):
proxy_util.simple_test_assert_no_proxy(
cluster, ["proxy1"], "http", "env_node_no_proxy"
)
def test_s3_with_http_proxy_list(cluster):
proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "http", "proxy_list_node")

View File

@ -0,0 +1,13 @@
<clickhouse>
<proxy>
<no_proxy>not_important_host,, minio1 ,</no_proxy>
<http>
<uri>http://proxy1</uri>
<uri>http://proxy2</uri>
</http>
<https>
<uri>https://proxy1</uri>
<uri>https://proxy2</uri>
</https>
</proxy>
</clickhouse>

View File

@ -0,0 +1,18 @@
<clickhouse>
<proxy>
<no_proxy>not_important_host,, minio1 ,</no_proxy>
<!--
At each interaction with S3 resolver sends empty GET request to specified endpoint URL to obtain proxy host.
Proxy host is returned as string in response body.
Then S3 client uses proxy URL formed as proxy_scheme://proxy_host:proxy_port to make request.
-->
<https>
<resolver>
<endpoint>http://resolver:8080/hostname</endpoint>
<proxy_scheme>https</proxy_scheme>
<proxy_port>443</proxy_port>
<proxy_cache_time>10</proxy_cache_time>
</resolver>
</https>
</proxy>
</clickhouse>

View File

@ -23,6 +23,15 @@ def cluster():
minio_certs_dir="minio_certs",
)
cluster.add_instance(
"remote_proxy_node_no_proxy",
main_configs=[
"configs/config.d/proxy_remote_no_proxy.xml",
"configs/config.d/ssl.xml",
],
with_minio=True,
)
cluster.add_instance(
"proxy_list_node",
main_configs=[
@ -32,6 +41,15 @@ def cluster():
with_minio=True,
)
cluster.add_instance(
"proxy_list_node_no_proxy",
main_configs=[
"configs/config.d/proxy_list_no_proxy.xml",
"configs/config.d/ssl.xml",
],
with_minio=True,
)
cluster.add_instance(
"env_node",
main_configs=[
@ -44,6 +62,19 @@ def cluster():
instance_env_variables=True,
)
cluster.add_instance(
"env_node_no_proxy",
main_configs=[
"configs/config.d/ssl.xml",
],
with_minio=True,
env_variables={
"https_proxy": "https://proxy1",
"no_proxy": "not_important_host,, minio1 ,",
},
instance_env_variables=True,
)
logging.info("Starting cluster...")
cluster.start()
logging.info("Cluster started")
@ -56,6 +87,24 @@ def cluster():
cluster.shutdown()
def test_s3_with_https_proxy_list_no_proxy(cluster):
proxy_util.simple_test_assert_no_proxy(
cluster, ["proxy1", "proxy2"], "https", "proxy_list_node_no_proxy"
)
def test_s3_with_https_env_no_proxy(cluster):
proxy_util.simple_test_assert_no_proxy(
cluster, ["proxy1"], "https", "env_node_no_proxy"
)
def test_s3_with_https_remote_no_proxy(cluster):
proxy_util.simple_test_assert_no_proxy(
cluster, ["proxy1"], "https", "remote_proxy_node_no_proxy"
)
def test_s3_with_https_proxy_list(cluster):
proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "https", "proxy_list_node")

View File

@ -758,12 +758,12 @@ def test_read_subcolumns(cluster):
)
res = node.query(
f"select a.b.d, _path, a.b, _file, dateDiff('minute', _time, now()), a.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv',"
f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv',"
f" 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto',"
f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
)
assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t0\t3\n"
assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n"
res = node.query(
f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.jsonl',"
@ -790,6 +790,25 @@ def test_read_subcolumns(cluster):
assert res == "42\tcont/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
def test_read_subcolumn_time(cluster):
node = cluster.instances["node"]
storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"]
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumn_time.tsv', "
f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto',"
f" 'a UInt32') select (42)",
)
res = node.query(
f"select a, dateDiff('minute', _time, now()) < 59 from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumn_time.tsv',"
f" 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto',"
f" 'a UInt32')"
)
assert res == "42\t1\n"
def test_read_from_not_existing_container(cluster):
node = cluster.instances["node"]
query = (

View File

@ -987,10 +987,10 @@ def test_read_subcolumns(started_cluster):
assert res == "2\ttest_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n"
res = node.query(
f"select x.b.d, _path, x.b, _file, dateDiff('minute', _time, now()), x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
)
assert res == "0\ttest_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\t0\n"
assert res == "0\ttest_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
res = node.query(
f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')"
@ -999,6 +999,20 @@ def test_read_subcolumns(started_cluster):
assert res == "42\ttest_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
def test_read_subcolumn_time(started_cluster):
node = started_cluster.instances["node1"]
node.query(
f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumn_time.tsv', auto, 'a UInt32') select (42)"
)
res = node.query(
f"select a, dateDiff('minute', _time, now()) < 59 from hdfs('hdfs://hdfs1:9000/test_subcolumn_time.tsv', auto, 'a UInt32')"
)
assert res == "42\t1\n"
def test_union_schema_inference_mode(started_cluster):
node = started_cluster.instances["node1"]

View File

@ -2121,12 +2121,10 @@ def test_read_subcolumns(started_cluster):
assert res == "0\troot/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
res = instance.query(
f"select x.b.d, _path, x.b, _file, dateDiff('minute', _time, now()), x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')"
f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')"
)
assert (
res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t0\t42\n"
)
assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
res = instance.query(
f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
@ -2154,7 +2152,20 @@ def test_read_subcolumns(started_cluster):
res == "42\t/root/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
)
logging.info("Some custom logging")
def test_read_subcolumn_time(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
instance.query(
f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumn_time.tsv', auto, 'a UInt32') select (42)"
)
res = instance.query(
f"select a, dateDiff('minute', _time, now()) < 59 from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumn_time.tsv', auto, 'a UInt32')"
)
assert res == "42\t1\n"
def test_filtering_by_file_or_path(started_cluster):

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv
$CLICKHOUSE_LOCAL -nm -q "
create table test (x UInt64, y UInt32, size UInt64) engine=Memory;
insert into test select c1, c2, _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') settings use_structure_from_insertion_table_in_table_functions=1;
select * from test;
"
rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv

View File

@ -1,14 +0,0 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv
sleep 1
$CLICKHOUSE_LOCAL -nm -q "
create table test (x UInt64, y UInt32, size UInt64, d32 DateTime32, d64 DateTime64) engine=Memory;
insert into test select c1, c2, _size, _time, _time from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') settings use_structure_from_insertion_table_in_table_functions=1;
select x, y, size, (dateDiff('millisecond', d32, now()) < 4000 AND dateDiff('millisecond', d32, now()) > 0), (dateDiff('second', d64, now()) < 4 AND dateDiff('second', d64, now()) > 0) from test;
"
rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv

View File

@ -0,0 +1,31 @@
DROP TABLE IF EXISTS complex_key_simple_attributes_source_short_circuit_table;
DROP DICTIONARY IF EXISTS cache_dictionary_complex_key_simple_attributes_short_circuit;
CREATE TABLE complex_key_simple_attributes_source_short_circuit_table
(
id UInt64,
id_key String,
value_first String,
value_second String
)
ENGINE = TinyLog;
INSERT INTO complex_key_simple_attributes_source_short_circuit_table VALUES(0, 'id_key_0', 'value_0', 'value_second_0');
CREATE DICTIONARY cache_dictionary_complex_key_simple_attributes_short_circuit
(
`id` UInt64,
`id_key` String,
`value_first` String DEFAULT 'value_first_default',
`value_second` String DEFAULT 'value_second_default'
)
PRIMARY KEY id, id_key
SOURCE(CLICKHOUSE(TABLE 'complex_key_simple_attributes_source_short_circuit_table'))
LIFETIME(MIN 1 MAX 1000)
LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10));
SELECT dictGetOrDefault('cache_dictionary_complex_key_simple_attributes_short_circuit', 'value_first', (number, concat(toString(number))), toString(materialize('default'))) AS value_first FROM system.numbers LIMIT 20 FORMAT Null;
SELECT dictGetOrDefault('cache_dictionary_complex_key_simple_attributes_short_circuit', 'value_first', (number, concat(toString(number))), toString(materialize('default'))) AS value_first FROM system.numbers LIMIT 20 FORMAT Null;
DROP TABLE IF EXISTS complex_key_simple_attributes_source_short_circuit_table;
DROP DICTIONARY IF EXISTS cache_dictionary_complex_key_simple_attributes_short_circuit;

View File

@ -0,0 +1,4 @@
1 one 0
2 two 0
3 \N 0
1 one 1 0

View File

@ -0,0 +1,19 @@
DROP TABLE IF EXISTS column_modify_test;
CREATE TABLE column_modify_test (id UInt64, val String, other_col UInt64) engine=MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part=0;
INSERT INTO column_modify_test VALUES (1,'one',0);
INSERT INTO column_modify_test VALUES (2,'two',0);
-- on 21.9 that was done via mutations mechanism
ALTER TABLE column_modify_test MODIFY COLUMN val Nullable(String);
INSERT INTO column_modify_test VALUES (3,Null,0);
-- till now everythings looks ok
SELECT * FROM column_modify_test order by id, val, other_col;
-- Now we do mutation. It will affect one of the parts, and will update columns.txt to the latest / correct state w/o updating the column file!
alter table column_modify_test update other_col=1 where id = 1 SETTINGS mutations_sync=1;
-- row 1 is damaged now the column file & columns.txt is out of sync!
SELECT *, throwIf(val <> 'one') as issue FROM column_modify_test WHERE id = 1;

View File

@ -0,0 +1 @@
4 1

View File

@ -0,0 +1,12 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv
sleep 1
$CLICKHOUSE_LOCAL -nm -q "
select _size, (dateDiff('millisecond', _time, now()) < 600000 AND dateDiff('millisecond', _time, now()) > 0) from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv');
"
rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv

View File

@ -0,0 +1,30 @@
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0

View File

@ -0,0 +1,25 @@
CREATE TABLE test_table
(
`key` UInt32,
`_part_offset` DEFAULT 0
)
ENGINE = MergeTree
ORDER BY key;
INSERT INTO test_table (key) SELECT number
FROM numbers(10);
set allow_experimental_analyzer=0;
SELECT *
FROM test_table;
set allow_experimental_analyzer=1;
SELECT *
FROM test_table;
SELECT
key,
_part_offset
FROM test_table;

View File

@ -0,0 +1,6 @@
100 1 1
300 3 0
200 2 2
100 1 1
300 3 0
200 2 2

View File

@ -0,0 +1,30 @@
-- Tags: no-parallel
CREATE TABLE x ( hash_id UInt64, user_result Decimal(3, 2) ) ENGINE = Memory();
CREATE TABLE y ( hash_id UInt64, user_result DECIMAL(18, 6) ) ENGINE = Memory();
INSERT INTO x values (100, 1), (200, 2);
INSERT INTO y values (100, 1), (300, 3), (200, 2);
CREATE DICTIONARY d1 (hash_id UInt64, user_result Decimal(3, 2) )
PRIMARY KEY hash_id
SOURCE(CLICKHOUSE(TABLE 'x'))
LIFETIME(0)
LAYOUT(HASHED());
SELECT hash_id,
dictGetOrDefault(d1, 'user_result', toUInt64(hash_id), toFloat64(user_result)),
dictGet(d1, 'user_result', toUInt64(hash_id))
FROM y;
CREATE DICTIONARY d2 (hash_id UInt64, user_result Decimal(3, 2) )
PRIMARY KEY hash_id
SOURCE(CLICKHOUSE(TABLE 'x'))
LIFETIME(0)
LAYOUT(HASHED_ARRAY());
SELECT hash_id,
dictGetOrDefault(d2, 'user_result', toUInt64(hash_id), toFloat64(user_result)),
dictGet(d2, 'user_result', toUInt64(hash_id))
FROM y;

View File

@ -65,6 +65,7 @@ EXTERN_TYPES_EXCLUDES=(
ProfileEvents::increment
ProfileEvents::incrementForLogMessage
ProfileEvents::getName
ProfileEvents::Timer
ProfileEvents::Type
ProfileEvents::TypeEnum
ProfileEvents::dumpToMapColumn
@ -242,7 +243,7 @@ done
# All submodules should be from https://github.com/
git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.url' | \
while read -r line; do
while read -r line; do
name=${line#submodule.}; name=${name%.url*}
url=${line#* }
[[ "$url" != 'https://github.com/'* ]] && echo "All submodules should be from https://github.com/, submodule '$name' has '$url'"
@ -323,6 +324,7 @@ std_cerr_cout_excludes=(
src/Bridge/IBridge.cpp
src/Daemon/BaseDaemon.cpp
src/Loggers/Loggers.cpp
src/Common/GWPAsan.cpp
)
sources_with_std_cerr_cout=( $(
find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \

View File

@ -1,6 +1,9 @@
v24.5.3.5-stable 2024-06-13
v24.5.2.34-stable 2024-06-13
v24.5.1.1763-stable 2024-06-01
v24.4.2.141-stable 2024-06-07
v24.4.1.2088-stable 2024-05-01
v24.3.4.147-lts 2024-06-13
v24.3.3.102-lts 2024-05-01
v24.3.2.23-lts 2024-04-03
v24.3.1.2672-lts 2024-03-27

1 v24.5.1.1763-stable v24.5.3.5-stable 2024-06-01 2024-06-13
1 v24.5.3.5-stable 2024-06-13
2 v24.5.2.34-stable 2024-06-13
3 v24.5.1.1763-stable v24.5.1.1763-stable 2024-06-01 2024-06-01
4 v24.4.2.141-stable v24.4.2.141-stable 2024-06-07 2024-06-07
5 v24.4.1.2088-stable v24.4.1.2088-stable 2024-05-01 2024-05-01
6 v24.3.4.147-lts 2024-06-13
7 v24.3.3.102-lts v24.3.3.102-lts 2024-05-01 2024-05-01
8 v24.3.2.23-lts v24.3.2.23-lts 2024-04-03 2024-04-03
9 v24.3.1.2672-lts v24.3.1.2672-lts 2024-03-27 2024-03-27