mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-15 10:52:30 +00:00
Merge branch 'master' into ch_canh_fix_decrypt_with_null
This commit is contained in:
commit
89ad7d696d
135
.github/workflows/backport_branches.yml
vendored
135
.github/workflows/backport_branches.yml
vendored
@ -349,6 +349,100 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
@ -425,6 +519,46 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
BuilderSpecialReport:
|
||||
needs:
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/report_check
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=ClickHouse special build check
|
||||
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Report Builder
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cat > "$NEEDS_DATA_PATH" << 'EOF'
|
||||
${{ toJSON(needs) }}
|
||||
EOF
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 build_report_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
########################### FUNCTIONAl STATELESS TESTS #######################################
|
||||
##############################################################################################
|
||||
@ -592,6 +726,7 @@ jobs:
|
||||
- DockerHubPush
|
||||
- DockerServerImages
|
||||
- BuilderReport
|
||||
- BuilderSpecialReport
|
||||
- FunctionalStatelessTestAsan
|
||||
- FunctionalStatefulTestDebug
|
||||
- StressTestTsan
|
||||
|
48
.github/workflows/master.yml
vendored
48
.github/workflows/master.yml
vendored
@ -923,6 +923,53 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinAmd64SSE2:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_amd64sse2
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
@ -1011,6 +1058,7 @@ jobs:
|
||||
- BuilderBinFreeBSD
|
||||
# - BuilderBinGCC
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinAmd64SSE2
|
||||
- BuilderBinClangTidy
|
||||
- BuilderDebShared
|
||||
runs-on: [self-hosted, style-checker]
|
||||
|
46
.github/workflows/pull_request.yml
vendored
46
.github/workflows/pull_request.yml
vendored
@ -935,6 +935,51 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinAmd64SSE2:
|
||||
needs: [DockerHubPush, FastTest, StyleCheck]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_amd64sse2
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
@ -1023,6 +1068,7 @@ jobs:
|
||||
- BuilderBinFreeBSD
|
||||
# - BuilderBinGCC
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinAmd64SSE2
|
||||
- BuilderBinClangTidy
|
||||
- BuilderDebShared
|
||||
runs-on: [self-hosted, style-checker]
|
||||
|
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@ -29,8 +29,12 @@ jobs:
|
||||
rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY"
|
||||
# Download and push packages to artifactory
|
||||
python3 ./tests/ci/push_to_artifactory.py --release "${{ github.ref }}" \
|
||||
--commit '${{ github.sha }}' --artifactory-url "${{ secrets.JFROG_ARTIFACTORY_URL }}" --all
|
||||
# Download macos binaries to ${{runner.temp}}/download_binary
|
||||
python3 ./tests/ci/download_binary.py binary_darwin binary_darwin_aarch64
|
||||
mv '${{runner.temp}}/download_binary/'clickhouse-* '${{runner.temp}}/push_to_artifactory'
|
||||
- name: Upload packages to release assets
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
|
135
.github/workflows/release_branches.yml
vendored
135
.github/workflows/release_branches.yml
vendored
@ -426,6 +426,100 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
BUILD_NAME=binary_darwin_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
@ -505,6 +599,46 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
BuilderSpecialReport:
|
||||
needs:
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/report_check
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=ClickHouse special build check
|
||||
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Report Builder
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cat > "$NEEDS_DATA_PATH" << 'EOF'
|
||||
${{ toJSON(needs) }}
|
||||
EOF
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 build_report_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
########################### FUNCTIONAl STATELESS TESTS #######################################
|
||||
##############################################################################################
|
||||
@ -1847,6 +1981,7 @@ jobs:
|
||||
- DockerHubPush
|
||||
- DockerServerImages
|
||||
- BuilderReport
|
||||
- BuilderSpecialReport
|
||||
- FunctionalStatelessTestDebug0
|
||||
- FunctionalStatelessTestDebug1
|
||||
- FunctionalStatelessTestDebug2
|
||||
|
@ -143,6 +143,8 @@ include (cmake/add_warning.cmake)
|
||||
if (COMPILER_CLANG)
|
||||
# generate ranges for fast "addr2line" search
|
||||
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
# NOTE: that clang has a bug because of it does not emit .debug_aranges
|
||||
# with ThinLTO, so custom ld.lld wrapper is shipped in docker images.
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
|
||||
endif ()
|
||||
|
||||
|
@ -15,4 +15,5 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
|
||||
|
||||
## Upcoming events
|
||||
* [**v22.8 Release Webinar**](https://clickhouse.com/company/events/v22-8-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
|
||||
* [**v22.9 Release Webinar**](https://clickhouse.com/company/events/v22-9-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
|
||||
* [**ClickHouse for Analytics @ Barracuda Networks**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/288140358/) Join us for this in person meetup hosted by our friends at Barracuda in Bay Area.
|
||||
|
@ -22,7 +22,7 @@ POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOL
|
||||
#endif
|
||||
|
||||
|
||||
/// Прочитать беззнаковое целое в простом формате из не-0-terminated строки.
|
||||
/// Read unsigned integer in a simple form from a non-0-terminated string.
|
||||
static UInt64 readUIntText(const char * buf, const char * end)
|
||||
{
|
||||
UInt64 x = 0;
|
||||
@ -59,7 +59,7 @@ static UInt64 readUIntText(const char * buf, const char * end)
|
||||
}
|
||||
|
||||
|
||||
/// Прочитать знаковое целое в простом формате из не-0-terminated строки.
|
||||
/// Read signed integer in a simple form from a non-0-terminated string.
|
||||
static Int64 readIntText(const char * buf, const char * end)
|
||||
{
|
||||
bool negative = false;
|
||||
@ -102,7 +102,7 @@ static Int64 readIntText(const char * buf, const char * end)
|
||||
}
|
||||
|
||||
|
||||
/// Прочитать число с плавающей запятой в простом формате, с грубым округлением, из не-0-terminated строки.
|
||||
/// Read floating point number in simple format, imprecisely, from a non-0-terminated string.
|
||||
static double readFloatText(const char * buf, const char * end)
|
||||
{
|
||||
bool negative = false;
|
||||
@ -151,8 +151,8 @@ static double readFloatText(const char * buf, const char * end)
|
||||
case 'E':
|
||||
{
|
||||
++buf;
|
||||
Int32 exponent = readIntText(buf, end);
|
||||
x *= preciseExp10(exponent);
|
||||
auto exponent = readIntText(buf, end);
|
||||
x *= preciseExp10(static_cast<double>(exponent));
|
||||
|
||||
run = false;
|
||||
break;
|
||||
@ -207,7 +207,7 @@ JSON::ElementType JSON::getType() const
|
||||
return TYPE_NUMBER;
|
||||
case '"':
|
||||
{
|
||||
/// Проверим - это просто строка или name-value pair
|
||||
/// Is it a string or a name-value pair?
|
||||
Pos after_string = skipString();
|
||||
if (after_string < ptr_end && *after_string == ':')
|
||||
return TYPE_NAME_VALUE_PAIR;
|
||||
@ -229,15 +229,13 @@ void JSON::checkPos(Pos pos) const
|
||||
|
||||
JSON::Pos JSON::skipString() const
|
||||
{
|
||||
//std::cerr << "skipString()\t" << data() << std::endl;
|
||||
|
||||
Pos pos = ptr_begin;
|
||||
checkPos(pos);
|
||||
if (*pos != '"')
|
||||
throw JSONException(std::string("JSON: expected \", got ") + *pos);
|
||||
++pos;
|
||||
|
||||
/// fast path: находим следующую двойную кавычку. Если перед ней нет бэкслеша - значит это конец строки (при допущении корректности JSON).
|
||||
/// fast path: find next double quote. If it is not escaped by backslash - then it's an end of string (assuming JSON is valid).
|
||||
Pos closing_quote = reinterpret_cast<const char *>(memchr(reinterpret_cast<const void *>(pos), '\"', ptr_end - pos));
|
||||
if (nullptr != closing_quote && closing_quote[-1] != '\\')
|
||||
return closing_quote + 1;
|
||||
@ -269,8 +267,6 @@ JSON::Pos JSON::skipString() const
|
||||
|
||||
JSON::Pos JSON::skipNumber() const
|
||||
{
|
||||
//std::cerr << "skipNumber()\t" << data() << std::endl;
|
||||
|
||||
Pos pos = ptr_begin;
|
||||
|
||||
checkPos(pos);
|
||||
@ -296,8 +292,6 @@ JSON::Pos JSON::skipNumber() const
|
||||
|
||||
JSON::Pos JSON::skipBool() const
|
||||
{
|
||||
//std::cerr << "skipBool()\t" << data() << std::endl;
|
||||
|
||||
Pos pos = ptr_begin;
|
||||
checkPos(pos);
|
||||
|
||||
@ -314,16 +308,12 @@ JSON::Pos JSON::skipBool() const
|
||||
|
||||
JSON::Pos JSON::skipNull() const
|
||||
{
|
||||
//std::cerr << "skipNull()\t" << data() << std::endl;
|
||||
|
||||
return ptr_begin + 4;
|
||||
}
|
||||
|
||||
|
||||
JSON::Pos JSON::skipNameValuePair() const
|
||||
{
|
||||
//std::cerr << "skipNameValuePair()\t" << data() << std::endl;
|
||||
|
||||
Pos pos = skipString();
|
||||
checkPos(pos);
|
||||
|
||||
@ -338,8 +328,6 @@ JSON::Pos JSON::skipNameValuePair() const
|
||||
|
||||
JSON::Pos JSON::skipArray() const
|
||||
{
|
||||
//std::cerr << "skipArray()\t" << data() << std::endl;
|
||||
|
||||
if (!isArray())
|
||||
throw JSONException("JSON: expected [");
|
||||
Pos pos = ptr_begin;
|
||||
@ -370,8 +358,6 @@ JSON::Pos JSON::skipArray() const
|
||||
|
||||
JSON::Pos JSON::skipObject() const
|
||||
{
|
||||
//std::cerr << "skipObject()\t" << data() << std::endl;
|
||||
|
||||
if (!isObject())
|
||||
throw JSONException("JSON: expected {");
|
||||
Pos pos = ptr_begin;
|
||||
@ -402,8 +388,6 @@ JSON::Pos JSON::skipObject() const
|
||||
|
||||
JSON::Pos JSON::skipElement() const
|
||||
{
|
||||
//std::cerr << "skipElement()\t" << data() << std::endl;
|
||||
|
||||
ElementType type = getType();
|
||||
|
||||
switch (type)
|
||||
@ -640,7 +624,7 @@ std::string JSON::getString() const
|
||||
{
|
||||
throw JSONException("JSON: incorrect syntax: incorrect HEX code.");
|
||||
}
|
||||
buf.resize(buf.size() + 6); /// максимальный размер UTF8 многобайтовой последовательности
|
||||
buf.resize(buf.size() + 6); /// Max size of UTF-8 sequence, including pre-standard mapping of UCS-4 to UTF-8.
|
||||
int res = utf8.convert(unicode,
|
||||
reinterpret_cast<unsigned char *>(const_cast<char*>(buf.data())) + buf.size() - 6, 6);
|
||||
if (!res)
|
||||
@ -754,8 +738,6 @@ JSON::iterator JSON::iterator::begin() const
|
||||
if (type != TYPE_ARRAY && type != TYPE_OBJECT)
|
||||
throw JSONException("JSON: not array or object when calling begin() method.");
|
||||
|
||||
//std::cerr << "begin()\t" << data() << std::endl;
|
||||
|
||||
Pos pos = ptr_begin + 1;
|
||||
checkPos(pos);
|
||||
if (*pos == '}' || *pos == ']')
|
||||
@ -846,4 +828,3 @@ bool JSON::isType<bool>() const
|
||||
{
|
||||
return isBool();
|
||||
}
|
||||
|
||||
|
@ -227,7 +227,7 @@ inline UInt64 shiftMix(UInt64 val)
|
||||
return val ^ (val >> 47);
|
||||
}
|
||||
|
||||
inline UInt64 rotateByAtLeast1(UInt64 val, int shift)
|
||||
inline UInt64 rotateByAtLeast1(UInt64 val, UInt8 shift)
|
||||
{
|
||||
return (val >> shift) | (val << (64 - shift));
|
||||
}
|
||||
@ -249,7 +249,7 @@ inline size_t hashLessThan8(const char * data, size_t size)
|
||||
uint8_t b = data[size >> 1];
|
||||
uint8_t c = data[size - 1];
|
||||
uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
|
||||
uint32_t z = size + (static_cast<uint32_t>(c) << 2);
|
||||
uint32_t z = static_cast<uint32_t>(size) + (static_cast<uint32_t>(c) << 2);
|
||||
return shiftMix(y * k2 ^ z * k3) * k2;
|
||||
}
|
||||
|
||||
@ -262,7 +262,7 @@ inline size_t hashLessThan16(const char * data, size_t size)
|
||||
{
|
||||
UInt64 a = unalignedLoad<UInt64>(data);
|
||||
UInt64 b = unalignedLoad<UInt64>(data + size - 8);
|
||||
return hashLen16(a, rotateByAtLeast1(b + size, size)) ^ b;
|
||||
return hashLen16(a, rotateByAtLeast1(b + size, static_cast<UInt8>(size))) ^ b;
|
||||
}
|
||||
|
||||
return hashLessThan8(data, size);
|
||||
|
@ -22,7 +22,7 @@ uint64_t getThreadId()
|
||||
#if defined(OS_ANDROID)
|
||||
current_tid = gettid();
|
||||
#elif defined(OS_LINUX)
|
||||
current_tid = syscall(SYS_gettid); /// This call is always successful. - man gettid
|
||||
current_tid = static_cast<uint64_t>(syscall(SYS_gettid)); /// This call is always successful. - man gettid
|
||||
#elif defined(OS_FREEBSD)
|
||||
current_tid = pthread_getthreadid_np();
|
||||
#elif defined(OS_SUNOS)
|
||||
|
@ -14,37 +14,37 @@ static T shift10Impl(T x, int exponent)
|
||||
static const long double powers10[] =
|
||||
{
|
||||
1e-323L, 1e-322L, 1e-321L, 1e-320L, 1e-319L, 1e-318L, 1e-317L, 1e-316L, 1e-315L, 1e-314L, 1e-313L, 1e-312L, 1e-311L,
|
||||
1e-310L,1e-309L,1e-308L,1e-307L,1e-306L,1e-305L,1e-304L,1e-303L,1e-302L,1e-301L,1e-300L,1e-299L,1e-298L,1e-297L,1e-296L,1e-295L,1e-294L,1e-293L,1e-292L,1e-291L,
|
||||
1e-290L,1e-289L,1e-288L,1e-287L,1e-286L,1e-285L,1e-284L,1e-283L,1e-282L,1e-281L,1e-280L,1e-279L,1e-278L,1e-277L,1e-276L,1e-275L,1e-274L,1e-273L,1e-272L,1e-271L,
|
||||
1e-270L,1e-269L,1e-268L,1e-267L,1e-266L,1e-265L,1e-264L,1e-263L,1e-262L,1e-261L,1e-260L,1e-259L,1e-258L,1e-257L,1e-256L,1e-255L,1e-254L,1e-253L,1e-252L,1e-251L,
|
||||
1e-250L,1e-249L,1e-248L,1e-247L,1e-246L,1e-245L,1e-244L,1e-243L,1e-242L,1e-241L,1e-240L,1e-239L,1e-238L,1e-237L,1e-236L,1e-235L,1e-234L,1e-233L,1e-232L,1e-231L,
|
||||
1e-230L,1e-229L,1e-228L,1e-227L,1e-226L,1e-225L,1e-224L,1e-223L,1e-222L,1e-221L,1e-220L,1e-219L,1e-218L,1e-217L,1e-216L,1e-215L,1e-214L,1e-213L,1e-212L,1e-211L,
|
||||
1e-210L,1e-209L,1e-208L,1e-207L,1e-206L,1e-205L,1e-204L,1e-203L,1e-202L,1e-201L,1e-200L,1e-199L,1e-198L,1e-197L,1e-196L,1e-195L,1e-194L,1e-193L,1e-192L,1e-191L,
|
||||
1e-190L,1e-189L,1e-188L,1e-187L,1e-186L,1e-185L,1e-184L,1e-183L,1e-182L,1e-181L,1e-180L,1e-179L,1e-178L,1e-177L,1e-176L,1e-175L,1e-174L,1e-173L,1e-172L,1e-171L,
|
||||
1e-170L,1e-169L,1e-168L,1e-167L,1e-166L,1e-165L,1e-164L,1e-163L,1e-162L,1e-161L,1e-160L,1e-159L,1e-158L,1e-157L,1e-156L,1e-155L,1e-154L,1e-153L,1e-152L,1e-151L,
|
||||
1e-150L,1e-149L,1e-148L,1e-147L,1e-146L,1e-145L,1e-144L,1e-143L,1e-142L,1e-141L,1e-140L,1e-139L,1e-138L,1e-137L,1e-136L,1e-135L,1e-134L,1e-133L,1e-132L,1e-131L,
|
||||
1e-130L,1e-129L,1e-128L,1e-127L,1e-126L,1e-125L,1e-124L,1e-123L,1e-122L,1e-121L,1e-120L,1e-119L,1e-118L,1e-117L,1e-116L,1e-115L,1e-114L,1e-113L,1e-112L,1e-111L,
|
||||
1e-110L,1e-109L,1e-108L,1e-107L,1e-106L,1e-105L,1e-104L,1e-103L,1e-102L,1e-101L,1e-100L,1e-99L,1e-98L,1e-97L,1e-96L,1e-95L,1e-94L,1e-93L,1e-92L,1e-91L,1e-90L,
|
||||
1e-89L,1e-88L,1e-87L,1e-86L,1e-85L,1e-84L,1e-83L,1e-82L,1e-81L,1e-80L,1e-79L,1e-78L,1e-77L,1e-76L,1e-75L,1e-74L,1e-73L,1e-72L,1e-71L,1e-70,
|
||||
1e-69L,1e-68L,1e-67L,1e-66L,1e-65L,1e-64L,1e-63L,1e-62L,1e-61L,1e-60L,1e-59L,1e-58L,1e-57L,1e-56L,1e-55L,1e-54L,1e-53L,1e-52L,1e-51L,1e-50,
|
||||
1e-49L,1e-48L,1e-47L,1e-46L,1e-45L,1e-44L,1e-43L,1e-42L,1e-41L,1e-40L,1e-39L,1e-38L,1e-37L,1e-36L,1e-35L,1e-34L,1e-33L,1e-32L,1e-31L,1e-30,
|
||||
1e-29L,1e-28L,1e-27L,1e-26L,1e-25L,1e-24L,1e-23L,1e-22L,1e-21L,1e-20L,1e-19L,1e-18L,1e-17L,1e-16L,1e-15L,1e-14L,1e-13L,1e-12L,1e-11L,1e-10,
|
||||
1e-9L,1e-8L,1e-7L,1e-6L,1e-5L,1e-4L,1e-3L,1e-2L,1e-1L,1e0L,1e1L,1e2L,1e3L,1e4L,1e5L,1e6L,1e7L,1e8L,1e9L,1e10,
|
||||
1e11L,1e12L,1e13L,1e14L,1e15L,1e16L,1e17L,1e18L,1e19L,1e20L,1e21L,1e22L,1e23L,1e24L,1e25L,1e26L,1e27L,1e28L,1e29L,1e30,
|
||||
1e31L,1e32L,1e33L,1e34L,1e35L,1e36L,1e37L,1e38L,1e39L,1e40L,1e41L,1e42L,1e43L,1e44L,1e45L,1e46L,1e47L,1e48L,1e49L,1e50,
|
||||
1e51L,1e52L,1e53L,1e54L,1e55L,1e56L,1e57L,1e58L,1e59L,1e60L,1e61L,1e62L,1e63L,1e64L,1e65L,1e66L,1e67L,1e68L,1e69L,1e70,
|
||||
1e71L,1e72L,1e73L,1e74L,1e75L,1e76L,1e77L,1e78L,1e79L,1e80L,1e81L,1e82L,1e83L,1e84L,1e85L,1e86L,1e87L,1e88L,1e89L,1e90,
|
||||
1e91L,1e92L,1e93L,1e94L,1e95L,1e96L,1e97L,1e98L,1e99L,1e100L,1e101L,1e102L,1e103L,1e104L,1e105L,1e106L,1e107L,1e108L,1e109L,1e110,
|
||||
1e111L,1e112L,1e113L,1e114L,1e115L,1e116L,1e117L,1e118L,1e119L,1e120L,1e121L,1e122L,1e123L,1e124L,1e125L,1e126L,1e127L,1e128L,1e129L,1e130,
|
||||
1e131L,1e132L,1e133L,1e134L,1e135L,1e136L,1e137L,1e138L,1e139L,1e140L,1e141L,1e142L,1e143L,1e144L,1e145L,1e146L,1e147L,1e148L,1e149L,1e150,
|
||||
1e151L,1e152L,1e153L,1e154L,1e155L,1e156L,1e157L,1e158L,1e159L,1e160L,1e161L,1e162L,1e163L,1e164L,1e165L,1e166L,1e167L,1e168L,1e169L,1e170,
|
||||
1e171L,1e172L,1e173L,1e174L,1e175L,1e176L,1e177L,1e178L,1e179L,1e180L,1e181L,1e182L,1e183L,1e184L,1e185L,1e186L,1e187L,1e188L,1e189L,1e190,
|
||||
1e191L,1e192L,1e193L,1e194L,1e195L,1e196L,1e197L,1e198L,1e199L,1e200L,1e201L,1e202L,1e203L,1e204L,1e205L,1e206L,1e207L,1e208L,1e209L,1e210,
|
||||
1e211L,1e212L,1e213L,1e214L,1e215L,1e216L,1e217L,1e218L,1e219L,1e220L,1e221L,1e222L,1e223L,1e224L,1e225L,1e226L,1e227L,1e228L,1e229L,1e230,
|
||||
1e231L,1e232L,1e233L,1e234L,1e235L,1e236L,1e237L,1e238L,1e239L,1e240L,1e241L,1e242L,1e243L,1e244L,1e245L,1e246L,1e247L,1e248L,1e249L,1e250,
|
||||
1e251L,1e252L,1e253L,1e254L,1e255L,1e256L,1e257L,1e258L,1e259L,1e260L,1e261L,1e262L,1e263L,1e264L,1e265L,1e266L,1e267L,1e268L,1e269L,1e270,
|
||||
1e271L,1e272L,1e273L,1e274L,1e275L,1e276L,1e277L,1e278L,1e279L,1e280L,1e281L,1e282L,1e283L,1e284L,1e285L,1e286L,1e287L,1e288L,1e289L,1e290,
|
||||
1e291L,1e292L,1e293L,1e294L,1e295L,1e296L,1e297L,1e298L,1e299L,1e300L,1e301L,1e302L,1e303L,1e304L,1e305L,1e306L,1e307L,1e308L
|
||||
1e-310L, 1e-309L, 1e-308L, 1e-307L, 1e-306L, 1e-305L, 1e-304L, 1e-303L, 1e-302L, 1e-301L, 1e-300L, 1e-299L, 1e-298L, 1e-297L, 1e-296L, 1e-295L, 1e-294L, 1e-293L, 1e-292L, 1e-291L,
|
||||
1e-290L, 1e-289L, 1e-288L, 1e-287L, 1e-286L, 1e-285L, 1e-284L, 1e-283L, 1e-282L, 1e-281L, 1e-280L, 1e-279L, 1e-278L, 1e-277L, 1e-276L, 1e-275L, 1e-274L, 1e-273L, 1e-272L, 1e-271L,
|
||||
1e-270L, 1e-269L, 1e-268L, 1e-267L, 1e-266L, 1e-265L, 1e-264L, 1e-263L, 1e-262L, 1e-261L, 1e-260L, 1e-259L, 1e-258L, 1e-257L, 1e-256L, 1e-255L, 1e-254L, 1e-253L, 1e-252L, 1e-251L,
|
||||
1e-250L, 1e-249L, 1e-248L, 1e-247L, 1e-246L, 1e-245L, 1e-244L, 1e-243L, 1e-242L, 1e-241L, 1e-240L, 1e-239L, 1e-238L, 1e-237L, 1e-236L, 1e-235L, 1e-234L, 1e-233L, 1e-232L, 1e-231L,
|
||||
1e-230L, 1e-229L, 1e-228L, 1e-227L, 1e-226L, 1e-225L, 1e-224L, 1e-223L, 1e-222L, 1e-221L, 1e-220L, 1e-219L, 1e-218L, 1e-217L, 1e-216L, 1e-215L, 1e-214L, 1e-213L, 1e-212L, 1e-211L,
|
||||
1e-210L, 1e-209L, 1e-208L, 1e-207L, 1e-206L, 1e-205L, 1e-204L, 1e-203L, 1e-202L, 1e-201L, 1e-200L, 1e-199L, 1e-198L, 1e-197L, 1e-196L, 1e-195L, 1e-194L, 1e-193L, 1e-192L, 1e-191L,
|
||||
1e-190L, 1e-189L, 1e-188L, 1e-187L, 1e-186L, 1e-185L, 1e-184L, 1e-183L, 1e-182L, 1e-181L, 1e-180L, 1e-179L, 1e-178L, 1e-177L, 1e-176L, 1e-175L, 1e-174L, 1e-173L, 1e-172L, 1e-171L,
|
||||
1e-170L, 1e-169L, 1e-168L, 1e-167L, 1e-166L, 1e-165L, 1e-164L, 1e-163L, 1e-162L, 1e-161L, 1e-160L, 1e-159L, 1e-158L, 1e-157L, 1e-156L, 1e-155L, 1e-154L, 1e-153L, 1e-152L, 1e-151L,
|
||||
1e-150L, 1e-149L, 1e-148L, 1e-147L, 1e-146L, 1e-145L, 1e-144L, 1e-143L, 1e-142L, 1e-141L, 1e-140L, 1e-139L, 1e-138L, 1e-137L, 1e-136L, 1e-135L, 1e-134L, 1e-133L, 1e-132L, 1e-131L,
|
||||
1e-130L, 1e-129L, 1e-128L, 1e-127L, 1e-126L, 1e-125L, 1e-124L, 1e-123L, 1e-122L, 1e-121L, 1e-120L, 1e-119L, 1e-118L, 1e-117L, 1e-116L, 1e-115L, 1e-114L, 1e-113L, 1e-112L, 1e-111L,
|
||||
1e-110L, 1e-109L, 1e-108L, 1e-107L, 1e-106L, 1e-105L, 1e-104L, 1e-103L, 1e-102L, 1e-101L, 1e-100L, 1e-99L, 1e-98L, 1e-97L, 1e-96L, 1e-95L, 1e-94L, 1e-93L, 1e-92L, 1e-91L, 1e-90L,
|
||||
1e-89L, 1e-88L, 1e-87L, 1e-86L, 1e-85L, 1e-84L, 1e-83L, 1e-82L, 1e-81L, 1e-80L, 1e-79L, 1e-78L, 1e-77L, 1e-76L, 1e-75L, 1e-74L, 1e-73L, 1e-72L, 1e-71L, 1e-70,
|
||||
1e-69L, 1e-68L, 1e-67L, 1e-66L, 1e-65L, 1e-64L, 1e-63L, 1e-62L, 1e-61L, 1e-60L, 1e-59L, 1e-58L, 1e-57L, 1e-56L, 1e-55L, 1e-54L, 1e-53L, 1e-52L, 1e-51L, 1e-50,
|
||||
1e-49L, 1e-48L, 1e-47L, 1e-46L, 1e-45L, 1e-44L, 1e-43L, 1e-42L, 1e-41L, 1e-40L, 1e-39L, 1e-38L, 1e-37L, 1e-36L, 1e-35L, 1e-34L, 1e-33L, 1e-32L, 1e-31L, 1e-30,
|
||||
1e-29L, 1e-28L, 1e-27L, 1e-26L, 1e-25L, 1e-24L, 1e-23L, 1e-22L, 1e-21L, 1e-20L, 1e-19L, 1e-18L, 1e-17L, 1e-16L, 1e-15L, 1e-14L, 1e-13L, 1e-12L, 1e-11L, 1e-10,
|
||||
1e-9L, 1e-8L, 1e-7L, 1e-6L, 1e-5L, 1e-4L, 1e-3L, 1e-2L, 1e-1L, 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 1e10,
|
||||
1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L, 1e30,
|
||||
1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L, 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L, 1e49L, 1e50,
|
||||
1e51L, 1e52L, 1e53L, 1e54L, 1e55L, 1e56L, 1e57L, 1e58L, 1e59L, 1e60L, 1e61L, 1e62L, 1e63L, 1e64L, 1e65L, 1e66L, 1e67L, 1e68L, 1e69L, 1e70,
|
||||
1e71L, 1e72L, 1e73L, 1e74L, 1e75L, 1e76L, 1e77L, 1e78L, 1e79L, 1e80L, 1e81L, 1e82L, 1e83L, 1e84L, 1e85L, 1e86L, 1e87L, 1e88L, 1e89L, 1e90,
|
||||
1e91L, 1e92L, 1e93L, 1e94L, 1e95L, 1e96L, 1e97L, 1e98L, 1e99L, 1e100L, 1e101L, 1e102L, 1e103L, 1e104L, 1e105L, 1e106L, 1e107L, 1e108L, 1e109L, 1e110,
|
||||
1e111L, 1e112L, 1e113L, 1e114L, 1e115L, 1e116L, 1e117L, 1e118L, 1e119L, 1e120L, 1e121L, 1e122L, 1e123L, 1e124L, 1e125L, 1e126L, 1e127L, 1e128L, 1e129L, 1e130,
|
||||
1e131L, 1e132L, 1e133L, 1e134L, 1e135L, 1e136L, 1e137L, 1e138L, 1e139L, 1e140L, 1e141L, 1e142L, 1e143L, 1e144L, 1e145L, 1e146L, 1e147L, 1e148L, 1e149L, 1e150,
|
||||
1e151L, 1e152L, 1e153L, 1e154L, 1e155L, 1e156L, 1e157L, 1e158L, 1e159L, 1e160L, 1e161L, 1e162L, 1e163L, 1e164L, 1e165L, 1e166L, 1e167L, 1e168L, 1e169L, 1e170,
|
||||
1e171L, 1e172L, 1e173L, 1e174L, 1e175L, 1e176L, 1e177L, 1e178L, 1e179L, 1e180L, 1e181L, 1e182L, 1e183L, 1e184L, 1e185L, 1e186L, 1e187L, 1e188L, 1e189L, 1e190,
|
||||
1e191L, 1e192L, 1e193L, 1e194L, 1e195L, 1e196L, 1e197L, 1e198L, 1e199L, 1e200L, 1e201L, 1e202L, 1e203L, 1e204L, 1e205L, 1e206L, 1e207L, 1e208L, 1e209L, 1e210,
|
||||
1e211L, 1e212L, 1e213L, 1e214L, 1e215L, 1e216L, 1e217L, 1e218L, 1e219L, 1e220L, 1e221L, 1e222L, 1e223L, 1e224L, 1e225L, 1e226L, 1e227L, 1e228L, 1e229L, 1e230,
|
||||
1e231L, 1e232L, 1e233L, 1e234L, 1e235L, 1e236L, 1e237L, 1e238L, 1e239L, 1e240L, 1e241L, 1e242L, 1e243L, 1e244L, 1e245L, 1e246L, 1e247L, 1e248L, 1e249L, 1e250,
|
||||
1e251L, 1e252L, 1e253L, 1e254L, 1e255L, 1e256L, 1e257L, 1e258L, 1e259L, 1e260L, 1e261L, 1e262L, 1e263L, 1e264L, 1e265L, 1e266L, 1e267L, 1e268L, 1e269L, 1e270,
|
||||
1e271L, 1e272L, 1e273L, 1e274L, 1e275L, 1e276L, 1e277L, 1e278L, 1e279L, 1e280L, 1e281L, 1e282L, 1e283L, 1e284L, 1e285L, 1e286L, 1e287L, 1e288L, 1e289L, 1e290,
|
||||
1e291L, 1e292L, 1e293L, 1e294L, 1e295L, 1e296L, 1e297L, 1e298L, 1e299L, 1e300L, 1e301L, 1e302L, 1e303L, 1e304L, 1e305L, 1e306L, 1e307L, 1e308L
|
||||
};
|
||||
|
||||
if (unlikely(exponent < min_exponent)) /// Note: there are some values below MIN_EXPONENT that is greater than zero.
|
||||
@ -52,7 +52,7 @@ static T shift10Impl(T x, int exponent)
|
||||
else if (unlikely(exponent > max_exponent))
|
||||
x *= std::numeric_limits<T>::infinity(); /// Multiplying to keep the sign of infinity.
|
||||
else
|
||||
x *= powers10[exponent - min_exponent];
|
||||
x *= static_cast<T>(powers10[exponent - min_exponent]);
|
||||
|
||||
return x;
|
||||
}
|
||||
@ -68,12 +68,12 @@ float shift10(float x, int exponent)
|
||||
return shift10Impl(x, exponent);
|
||||
}
|
||||
|
||||
double shift10(UInt64 x, int exponent)
|
||||
long double shift10(UInt64 x, int exponent)
|
||||
{
|
||||
return shift10Impl(static_cast<long double>(x), exponent);
|
||||
}
|
||||
|
||||
double shift10(Int64 x, int exponent)
|
||||
long double shift10(Int64 x, int exponent)
|
||||
{
|
||||
return shift10Impl(static_cast<long double>(x), exponent);
|
||||
}
|
||||
|
@ -12,5 +12,5 @@
|
||||
double shift10(double x, int exponent);
|
||||
float shift10(float x, int exponent);
|
||||
|
||||
double shift10(UInt64 x, int exponent);
|
||||
double shift10(Int64 x, int exponent);
|
||||
long double shift10(UInt64 x, int exponent);
|
||||
long double shift10(Int64 x, int exponent);
|
||||
|
@ -11,12 +11,12 @@ std::string setColor(UInt64 hash)
|
||||
/// It still looks awesome.
|
||||
UInt8 y = 128;
|
||||
|
||||
UInt8 cb = hash % 256;
|
||||
UInt8 cr = hash / 256 % 256;
|
||||
UInt8 cb = static_cast<UInt8>(hash % 256);
|
||||
UInt8 cr = static_cast<UInt8>(hash / 256 % 256);
|
||||
|
||||
UInt8 r = std::max(0.0, std::min(255.0, y + 1.402 * (cr - 128)));
|
||||
UInt8 g = std::max(0.0, std::min(255.0, y - 0.344136 * (cb - 128) - 0.714136 * (cr - 128)));
|
||||
UInt8 b = std::max(0.0, std::min(255.0, y + 1.772 * (cb - 128)));
|
||||
UInt8 r = static_cast<UInt8>(std::max(0.0, std::min(255.0, y + 1.402 * (cr - 128))));
|
||||
UInt8 g = static_cast<UInt8>(std::max(0.0, std::min(255.0, y - 0.344136 * (cb - 128) - 0.714136 * (cr - 128))));
|
||||
UInt8 b = static_cast<UInt8>(std::max(0.0, std::min(255.0, y + 1.772 * (cb - 128))));
|
||||
|
||||
/// ANSI escape sequence to set 24-bit foreground font color in terminal.
|
||||
return "\033[38;2;" + std::to_string(r) + ";" + std::to_string(g) + ";" + std::to_string(b) + "m";
|
||||
|
@ -453,7 +453,7 @@ private:
|
||||
if constexpr (sizeof(T) <= sizeof(base_type))
|
||||
{
|
||||
if (0 == idx)
|
||||
return x;
|
||||
return static_cast<base_type>(x);
|
||||
}
|
||||
else if (idx * sizeof(base_type) < sizeof(T))
|
||||
return x >> (idx * base_bits); // & std::numeric_limits<base_type>::max()
|
||||
@ -1239,13 +1239,13 @@ constexpr integer<Bits, Signed>::operator long double() const noexcept
|
||||
template <size_t Bits, typename Signed>
|
||||
constexpr integer<Bits, Signed>::operator double() const noexcept
|
||||
{
|
||||
return static_cast<long double>(*this);
|
||||
return static_cast<double>(static_cast<long double>(*this));
|
||||
}
|
||||
|
||||
template <size_t Bits, typename Signed>
|
||||
constexpr integer<Bits, Signed>::operator float() const noexcept
|
||||
{
|
||||
return static_cast<long double>(*this);
|
||||
return static_cast<float>(static_cast<long double>(*this));
|
||||
}
|
||||
|
||||
// Unary operators
|
||||
|
@ -24,6 +24,23 @@ option (ENABLE_BMI "Use BMI instructions on x86_64" 0)
|
||||
option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0)
|
||||
option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0)
|
||||
|
||||
# X86: Allow compilation for a SSE2-only target machine. Done by a special build in CI for embedded or very old hardware.
|
||||
option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64" 0)
|
||||
if (NO_SSE3_OR_HIGHER)
|
||||
SET(ENABLE_SSSE3 0)
|
||||
SET(ENABLE_SSE41 0)
|
||||
SET(ENABLE_SSE42 0)
|
||||
SET(ENABLE_PCLMULQDQ 0)
|
||||
SET(ENABLE_POPCNT 0)
|
||||
SET(ENABLE_AVX 0)
|
||||
SET(ENABLE_AVX2 0)
|
||||
SET(ENABLE_AVX512 0)
|
||||
SET(ENABLE_AVX512_VBMI 0)
|
||||
SET(ENABLE_BMI 0)
|
||||
SET(ENABLE_AVX2_FOR_SPEC_OP 0)
|
||||
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
|
||||
endif()
|
||||
|
||||
option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0)
|
||||
|
||||
if (ARCH_NATIVE)
|
||||
|
17
cmake/ld.lld.in
Executable file
17
cmake/ld.lld.in
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is a workaround for bug in llvm/clang,
|
||||
# that does not produce .debug_aranges with LTO
|
||||
#
|
||||
# NOTE: this is a temporary solution, that should be removed once [1] will be
|
||||
# resolved.
|
||||
#
|
||||
# [1]: https://discourse.llvm.org/t/clang-does-not-produce-full-debug-aranges-section-with-thinlto/64898/8
|
||||
|
||||
# NOTE: only -flto=thin is supported.
|
||||
# NOTE: it is not possible to check was there -gdwarf-aranges initially or not.
|
||||
if [[ "$*" =~ -plugin-opt=thinlto ]]; then
|
||||
exec "@LLD_PATH@" -mllvm -generate-arange-section "$@"
|
||||
else
|
||||
exec "@LLD_PATH@" "$@"
|
||||
fi
|
@ -20,7 +20,7 @@ macro(clickhouse_split_debug_symbols)
|
||||
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
|
||||
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
|
||||
# Splits debug symbols into separate file, leaves the binary untouched:
|
||||
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
|
||||
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
|
||||
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
|
||||
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
|
||||
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
|
||||
|
@ -94,8 +94,13 @@ if (LINKER_NAME)
|
||||
if (NOT LLD_PATH)
|
||||
message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
|
||||
endif ()
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}")
|
||||
|
||||
# This a temporary quirk to emit .debug_aranges with ThinLTO
|
||||
set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
|
||||
configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)
|
||||
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
|
||||
else ()
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
|
||||
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
|
||||
|
2
contrib/capnproto
vendored
2
contrib/capnproto
vendored
@ -1 +1 @@
|
||||
Subproject commit c8189ec3c27dacbd4a3288e682473010e377f593
|
||||
Subproject commit 2e88221d3dde22266bfccf40eaee6ff9b40d113d
|
@ -1,11 +1,9 @@
|
||||
# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan.
|
||||
|
||||
if (ARCH_AMD64)
|
||||
if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64)
|
||||
option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES})
|
||||
endif()
|
||||
|
||||
# TODO: vectorscan supports ARM yet some tests involving cyrillic letters fail (PR #38171) ... needs further investigation
|
||||
|
||||
# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine
|
||||
|
||||
if (NOT ENABLE_VECTORSCAN)
|
||||
@ -236,11 +234,27 @@ set (SRCS
|
||||
|
||||
# The original build system invokes ragel on src/parser/{Parser|control_verbs}.rl (+ a few more .rl files which are unneeded). To avoid a
|
||||
# build-time dependency on ragel (via contrib/ or find_program()), add the manually generated output of ragel to the sources.
|
||||
# Please regenerate these files if you update vectorscan.
|
||||
list (APPEND SRCS
|
||||
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/Parser.cpp"
|
||||
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/control_verbs.cpp"
|
||||
)
|
||||
#
|
||||
# Please regenerate these files if you update vectorscan. They must be regenerated for each platform separately because ragel produces for
|
||||
# weird reasons different constants in the output.
|
||||
#
|
||||
# Also, please use upstream versions of colm and ragel, the packages in Ubuntu 22.04 seem to produce wrong output on ARM.
|
||||
if (ARCH_AMD64)
|
||||
list (APPEND SRCS
|
||||
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/amd64/Parser.cpp"
|
||||
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/amd64/control_verbs.cpp"
|
||||
)
|
||||
elseif (ARCH_AARCH64)
|
||||
list (APPEND SRCS
|
||||
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/aarch64/Parser.cpp"
|
||||
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/aarch64/control_verbs.cpp"
|
||||
)
|
||||
set_source_files_properties(
|
||||
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/aarch64/Parser.cpp"
|
||||
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/aarch64/control_verbs.cpp"
|
||||
COMPILE_FLAGS -Wno-c++11-narrowing
|
||||
)
|
||||
endif()
|
||||
|
||||
# Platform-dependent files
|
||||
if (ARCH_AMD64)
|
||||
|
File diff suppressed because it is too large
Load Diff
10725
contrib/vectorscan-cmake/rageled_files/aarch64/Parser.cpp
Normal file
10725
contrib/vectorscan-cmake/rageled_files/aarch64/Parser.cpp
Normal file
File diff suppressed because it is too large
Load Diff
547
contrib/vectorscan-cmake/rageled_files/aarch64/control_verbs.cpp
Normal file
547
contrib/vectorscan-cmake/rageled_files/aarch64/control_verbs.cpp
Normal file
@ -0,0 +1,547 @@
|
||||
#line 1 "control_verbs.rl"
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Parser for control verbs that can occur at the beginning of a pattern.
|
||||
*/
|
||||
|
||||
#include "parser/control_verbs.h"
|
||||
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/parse_error.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
const char *read_control_verbs(const char *ptr, const char *end, size_t start,
|
||||
ParseMode &mode) {
|
||||
const char *p = ptr;
|
||||
const char *pe = end;
|
||||
const char *eof = pe;
|
||||
const char *ts, *te;
|
||||
int cs;
|
||||
UNUSED int act;
|
||||
|
||||
|
||||
#line 56 "control_verbs.cpp"
|
||||
static const signed char _ControlVerbs_actions[] = {
|
||||
0, 1, 0, 1, 1, 1, 2, 1,
|
||||
3, 1, 4, 1, 5, 1, 6, 1,
|
||||
7, 1, 8, 1, 9, 0
|
||||
};
|
||||
|
||||
static const short _ControlVerbs_key_offsets[] = {
|
||||
0, 7, 8, 10, 12, 14, 16, 18,
|
||||
20, 21, 23, 25, 27, 30, 32, 34,
|
||||
36, 38, 40, 42, 44, 46, 48, 50,
|
||||
52, 55, 57, 59, 61, 63, 66, 68,
|
||||
70, 72, 74, 76, 79, 82, 84, 86,
|
||||
88, 90, 92, 94, 96, 98, 100, 102,
|
||||
105, 107, 109, 111, 113, 115, 117, 119,
|
||||
121, 123, 125, 127, 129, 131, 133, 135,
|
||||
137, 139, 141, 143, 146, 148, 149, 151,
|
||||
155, 157, 159, 160, 161, 0
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_trans_keys[] = {
|
||||
41u, 65u, 66u, 67u, 76u, 78u, 85u, 41u,
|
||||
41u, 78u, 41u, 89u, 41u, 67u, 41u, 82u,
|
||||
41u, 76u, 41u, 70u, 41u, 41u, 83u, 41u,
|
||||
82u, 41u, 95u, 41u, 65u, 85u, 41u, 78u,
|
||||
41u, 89u, 41u, 67u, 41u, 78u, 41u, 73u,
|
||||
41u, 67u, 41u, 79u, 41u, 68u, 41u, 69u,
|
||||
41u, 82u, 41u, 76u, 41u, 70u, 73u, 41u,
|
||||
77u, 41u, 73u, 41u, 84u, 41u, 95u, 41u,
|
||||
77u, 82u, 41u, 65u, 41u, 84u, 41u, 67u,
|
||||
41u, 72u, 41u, 61u, 41u, 48u, 57u, 41u,
|
||||
48u, 57u, 41u, 69u, 41u, 67u, 41u, 85u,
|
||||
41u, 82u, 41u, 83u, 41u, 73u, 41u, 79u,
|
||||
41u, 78u, 41u, 79u, 41u, 95u, 41u, 65u,
|
||||
83u, 41u, 85u, 41u, 84u, 41u, 79u, 41u,
|
||||
95u, 41u, 80u, 41u, 79u, 41u, 83u, 41u,
|
||||
83u, 41u, 69u, 41u, 83u, 41u, 83u, 41u,
|
||||
84u, 41u, 65u, 41u, 82u, 41u, 84u, 41u,
|
||||
95u, 41u, 79u, 41u, 80u, 41u, 84u, 41u,
|
||||
67u, 84u, 41u, 80u, 41u, 41u, 70u, 41u,
|
||||
49u, 51u, 56u, 41u, 54u, 41u, 50u, 41u,
|
||||
40u, 42u, 0u
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_single_lengths[] = {
|
||||
7, 1, 2, 2, 2, 2, 2, 2,
|
||||
1, 2, 2, 2, 3, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 2, 2, 2, 2, 3, 2, 2,
|
||||
2, 2, 2, 1, 1, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 3, 2, 1, 2, 4,
|
||||
2, 2, 1, 1, 1, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_range_lengths[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
static const short _ControlVerbs_index_offsets[] = {
|
||||
0, 8, 10, 13, 16, 19, 22, 25,
|
||||
28, 30, 33, 36, 39, 43, 46, 49,
|
||||
52, 55, 58, 61, 64, 67, 70, 73,
|
||||
76, 80, 83, 86, 89, 92, 96, 99,
|
||||
102, 105, 108, 111, 114, 117, 120, 123,
|
||||
126, 129, 132, 135, 138, 141, 144, 147,
|
||||
151, 154, 157, 160, 163, 166, 169, 172,
|
||||
175, 178, 181, 184, 187, 190, 193, 196,
|
||||
199, 202, 205, 208, 212, 215, 217, 220,
|
||||
225, 228, 231, 233, 235, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_cond_targs[] = {
|
||||
75, 2, 9, 22, 24, 45, 67, 1,
|
||||
75, 1, 75, 3, 1, 75, 4, 1,
|
||||
75, 5, 1, 75, 6, 1, 75, 7,
|
||||
1, 75, 8, 1, 75, 1, 75, 10,
|
||||
1, 75, 11, 1, 75, 12, 1, 75,
|
||||
13, 16, 1, 75, 14, 1, 75, 15,
|
||||
1, 75, 5, 1, 75, 17, 1, 75,
|
||||
18, 1, 75, 19, 1, 75, 20, 1,
|
||||
75, 21, 1, 75, 8, 1, 75, 23,
|
||||
1, 75, 7, 1, 75, 8, 25, 1,
|
||||
75, 26, 1, 75, 27, 1, 75, 28,
|
||||
1, 75, 29, 1, 75, 30, 37, 1,
|
||||
75, 31, 1, 75, 32, 1, 75, 33,
|
||||
1, 75, 34, 1, 75, 35, 1, 75,
|
||||
36, 1, 75, 36, 1, 75, 38, 1,
|
||||
75, 39, 1, 75, 40, 1, 75, 41,
|
||||
1, 75, 42, 1, 75, 43, 1, 75,
|
||||
44, 1, 75, 34, 1, 75, 46, 1,
|
||||
75, 47, 1, 75, 48, 59, 1, 75,
|
||||
49, 1, 75, 50, 1, 75, 51, 1,
|
||||
75, 52, 1, 75, 53, 1, 75, 54,
|
||||
1, 75, 55, 1, 75, 56, 1, 75,
|
||||
57, 1, 75, 58, 1, 75, 8, 1,
|
||||
75, 60, 1, 75, 61, 1, 75, 62,
|
||||
1, 75, 63, 1, 75, 64, 1, 75,
|
||||
65, 1, 75, 66, 1, 75, 8, 1,
|
||||
75, 68, 70, 1, 75, 69, 1, 75,
|
||||
1, 75, 71, 1, 75, 72, 73, 74,
|
||||
1, 75, 8, 1, 75, 8, 1, 75,
|
||||
1, 76, 75, 0, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_cond_actions[] = {
|
||||
19, 0, 0, 0, 0, 0, 0, 0,
|
||||
13, 0, 13, 0, 0, 13, 0, 0,
|
||||
11, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 11, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 11, 0, 0, 13, 0, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 11, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 0, 13, 0, 0, 9,
|
||||
0, 13, 0, 0, 7, 0, 0, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 7,
|
||||
0, 5, 15, 0, 17, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
0, 17, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_to_state_actions[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_from_state_actions[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 3, 0, 0
|
||||
};
|
||||
|
||||
static const short _ControlVerbs_eof_trans[] = {
|
||||
238, 239, 240, 241, 242, 243, 244, 245,
|
||||
246, 247, 248, 249, 250, 251, 252, 253,
|
||||
254, 255, 256, 257, 258, 259, 260, 261,
|
||||
262, 263, 264, 265, 266, 267, 268, 269,
|
||||
270, 271, 272, 273, 274, 275, 276, 277,
|
||||
278, 279, 280, 281, 282, 283, 284, 285,
|
||||
286, 287, 288, 289, 290, 291, 292, 293,
|
||||
294, 295, 296, 297, 298, 299, 300, 301,
|
||||
302, 303, 304, 305, 306, 307, 308, 309,
|
||||
310, 311, 312, 313, 314, 0
|
||||
};
|
||||
|
||||
static const int ControlVerbs_start = 75;
|
||||
static const int ControlVerbs_first_final = 75;
|
||||
static const int ControlVerbs_error = -1;
|
||||
|
||||
static const int ControlVerbs_en_main = 75;
|
||||
|
||||
|
||||
#line 269 "control_verbs.cpp"
|
||||
{
|
||||
cs = (int)ControlVerbs_start;
|
||||
ts = 0;
|
||||
te = 0;
|
||||
}
|
||||
|
||||
#line 105 "control_verbs.rl"
|
||||
|
||||
|
||||
try {
|
||||
|
||||
#line 278 "control_verbs.cpp"
|
||||
{
|
||||
int _klen;
|
||||
unsigned int _trans = 0;
|
||||
const char * _keys;
|
||||
const signed char * _acts;
|
||||
unsigned int _nacts;
|
||||
_resume: {}
|
||||
if ( p == pe && p != eof )
|
||||
goto _out;
|
||||
_acts = ( _ControlVerbs_actions + (_ControlVerbs_from_state_actions[cs]));
|
||||
_nacts = (unsigned int)(*( _acts));
|
||||
_acts += 1;
|
||||
while ( _nacts > 0 ) {
|
||||
switch ( (*( _acts)) ) {
|
||||
case 1: {
|
||||
{
|
||||
#line 1 "NONE"
|
||||
{ts = p;}}
|
||||
|
||||
#line 297 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
_nacts -= 1;
|
||||
_acts += 1;
|
||||
}
|
||||
|
||||
if ( p == eof ) {
|
||||
if ( _ControlVerbs_eof_trans[cs] > 0 ) {
|
||||
_trans = (unsigned int)_ControlVerbs_eof_trans[cs] - 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
_keys = ( _ControlVerbs_trans_keys + (_ControlVerbs_key_offsets[cs]));
|
||||
_trans = (unsigned int)_ControlVerbs_index_offsets[cs];
|
||||
|
||||
_klen = (int)_ControlVerbs_single_lengths[cs];
|
||||
if ( _klen > 0 ) {
|
||||
const char *_lower = _keys;
|
||||
const char *_upper = _keys + _klen - 1;
|
||||
const char *_mid;
|
||||
while ( 1 ) {
|
||||
if ( _upper < _lower ) {
|
||||
_keys += _klen;
|
||||
_trans += (unsigned int)_klen;
|
||||
break;
|
||||
}
|
||||
|
||||
_mid = _lower + ((_upper-_lower) >> 1);
|
||||
if ( ( (*( p))) < (*( _mid)) )
|
||||
_upper = _mid - 1;
|
||||
else if ( ( (*( p))) > (*( _mid)) )
|
||||
_lower = _mid + 1;
|
||||
else {
|
||||
_trans += (unsigned int)(_mid - _keys);
|
||||
goto _match;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_klen = (int)_ControlVerbs_range_lengths[cs];
|
||||
if ( _klen > 0 ) {
|
||||
const char *_lower = _keys;
|
||||
const char *_upper = _keys + (_klen<<1) - 2;
|
||||
const char *_mid;
|
||||
while ( 1 ) {
|
||||
if ( _upper < _lower ) {
|
||||
_trans += (unsigned int)_klen;
|
||||
break;
|
||||
}
|
||||
|
||||
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
|
||||
if ( ( (*( p))) < (*( _mid)) )
|
||||
_upper = _mid - 2;
|
||||
else if ( ( (*( p))) > (*( _mid + 1)) )
|
||||
_lower = _mid + 2;
|
||||
else {
|
||||
_trans += (unsigned int)((_mid - _keys)>>1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_match: {}
|
||||
}
|
||||
cs = (int)_ControlVerbs_cond_targs[_trans];
|
||||
|
||||
if ( _ControlVerbs_cond_actions[_trans] != 0 ) {
|
||||
|
||||
_acts = ( _ControlVerbs_actions + (_ControlVerbs_cond_actions[_trans]));
|
||||
_nacts = (unsigned int)(*( _acts));
|
||||
_acts += 1;
|
||||
while ( _nacts > 0 ) {
|
||||
switch ( (*( _acts)) )
|
||||
{
|
||||
case 2: {
|
||||
{
|
||||
#line 1 "NONE"
|
||||
{te = p+1;}}
|
||||
|
||||
#line 378 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
{
|
||||
#line 76 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 76 "control_verbs.rl"
|
||||
|
||||
mode.utf8 = true;
|
||||
}
|
||||
}}
|
||||
|
||||
#line 391 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
{
|
||||
#line 80 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 80 "control_verbs.rl"
|
||||
|
||||
mode.ucp = true;
|
||||
}
|
||||
}}
|
||||
|
||||
#line 404 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
{
|
||||
#line 84 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 84 "control_verbs.rl"
|
||||
|
||||
ostringstream str;
|
||||
str << "Unsupported control verb " << string(ts, te - ts);
|
||||
throw LocatedParseError(str.str());
|
||||
}
|
||||
}}
|
||||
|
||||
#line 419 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
{
|
||||
#line 90 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 90 "control_verbs.rl"
|
||||
|
||||
ostringstream str;
|
||||
str << "Unknown control verb " << string(ts, te - ts);
|
||||
throw LocatedParseError(str.str());
|
||||
}
|
||||
}}
|
||||
|
||||
#line 434 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 7: {
|
||||
{
|
||||
#line 97 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 97 "control_verbs.rl"
|
||||
|
||||
{p = p - 1; }
|
||||
{p += 1; goto _out; }
|
||||
}
|
||||
}}
|
||||
|
||||
#line 448 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
{
|
||||
#line 97 "control_verbs.rl"
|
||||
{te = p;p = p - 1;{
|
||||
#line 97 "control_verbs.rl"
|
||||
|
||||
{p = p - 1; }
|
||||
{p += 1; goto _out; }
|
||||
}
|
||||
}}
|
||||
|
||||
#line 462 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 9: {
|
||||
{
|
||||
#line 97 "control_verbs.rl"
|
||||
{p = ((te))-1;
|
||||
{
|
||||
#line 97 "control_verbs.rl"
|
||||
|
||||
{p = p - 1; }
|
||||
{p += 1; goto _out; }
|
||||
}
|
||||
}}
|
||||
|
||||
#line 477 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
_nacts -= 1;
|
||||
_acts += 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( p == eof ) {
|
||||
if ( cs >= 75 )
|
||||
goto _out;
|
||||
}
|
||||
else {
|
||||
_acts = ( _ControlVerbs_actions + (_ControlVerbs_to_state_actions[cs]));
|
||||
_nacts = (unsigned int)(*( _acts));
|
||||
_acts += 1;
|
||||
while ( _nacts > 0 ) {
|
||||
switch ( (*( _acts)) ) {
|
||||
case 0: {
|
||||
{
|
||||
#line 1 "NONE"
|
||||
{ts = 0;}}
|
||||
|
||||
#line 502 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
_nacts -= 1;
|
||||
_acts += 1;
|
||||
}
|
||||
|
||||
p += 1;
|
||||
goto _resume;
|
||||
}
|
||||
_out: {}
|
||||
}
|
||||
|
||||
#line 108 "control_verbs.rl"
|
||||
|
||||
} catch (LocatedParseError &error) {
|
||||
if (ts >= ptr && ts <= pe) {
|
||||
error.locate(ts - ptr + start);
|
||||
} else {
|
||||
error.locate(0);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
10725
contrib/vectorscan-cmake/rageled_files/amd64/Parser.cpp
Normal file
10725
contrib/vectorscan-cmake/rageled_files/amd64/Parser.cpp
Normal file
File diff suppressed because it is too large
Load Diff
547
contrib/vectorscan-cmake/rageled_files/amd64/control_verbs.cpp
Normal file
547
contrib/vectorscan-cmake/rageled_files/amd64/control_verbs.cpp
Normal file
@ -0,0 +1,547 @@
|
||||
#line 1 "control_verbs.rl"
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Parser for control verbs that can occur at the beginning of a pattern.
|
||||
*/
|
||||
|
||||
#include "parser/control_verbs.h"
|
||||
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/parse_error.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
const char *read_control_verbs(const char *ptr, const char *end, size_t start,
|
||||
ParseMode &mode) {
|
||||
const char *p = ptr;
|
||||
const char *pe = end;
|
||||
const char *eof = pe;
|
||||
const char *ts, *te;
|
||||
int cs;
|
||||
UNUSED int act;
|
||||
|
||||
|
||||
#line 56 "control_verbs.cpp"
|
||||
static const signed char _ControlVerbs_actions[] = {
|
||||
0, 1, 0, 1, 1, 1, 2, 1,
|
||||
3, 1, 4, 1, 5, 1, 6, 1,
|
||||
7, 1, 8, 1, 9, 0
|
||||
};
|
||||
|
||||
static const short _ControlVerbs_key_offsets[] = {
|
||||
0, 7, 8, 10, 12, 14, 16, 18,
|
||||
20, 21, 23, 25, 27, 30, 32, 34,
|
||||
36, 38, 40, 42, 44, 46, 48, 50,
|
||||
52, 55, 57, 59, 61, 63, 66, 68,
|
||||
70, 72, 74, 76, 79, 82, 84, 86,
|
||||
88, 90, 92, 94, 96, 98, 100, 102,
|
||||
105, 107, 109, 111, 113, 115, 117, 119,
|
||||
121, 123, 125, 127, 129, 131, 133, 135,
|
||||
137, 139, 141, 143, 146, 148, 149, 151,
|
||||
155, 157, 159, 160, 161, 0
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_trans_keys[] = {
|
||||
41, 65, 66, 67, 76, 78, 85, 41,
|
||||
41, 78, 41, 89, 41, 67, 41, 82,
|
||||
41, 76, 41, 70, 41, 41, 83, 41,
|
||||
82, 41, 95, 41, 65, 85, 41, 78,
|
||||
41, 89, 41, 67, 41, 78, 41, 73,
|
||||
41, 67, 41, 79, 41, 68, 41, 69,
|
||||
41, 82, 41, 76, 41, 70, 73, 41,
|
||||
77, 41, 73, 41, 84, 41, 95, 41,
|
||||
77, 82, 41, 65, 41, 84, 41, 67,
|
||||
41, 72, 41, 61, 41, 48, 57, 41,
|
||||
48, 57, 41, 69, 41, 67, 41, 85,
|
||||
41, 82, 41, 83, 41, 73, 41, 79,
|
||||
41, 78, 41, 79, 41, 95, 41, 65,
|
||||
83, 41, 85, 41, 84, 41, 79, 41,
|
||||
95, 41, 80, 41, 79, 41, 83, 41,
|
||||
83, 41, 69, 41, 83, 41, 83, 41,
|
||||
84, 41, 65, 41, 82, 41, 84, 41,
|
||||
95, 41, 79, 41, 80, 41, 84, 41,
|
||||
67, 84, 41, 80, 41, 41, 70, 41,
|
||||
49, 51, 56, 41, 54, 41, 50, 41,
|
||||
40, 42, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_single_lengths[] = {
|
||||
7, 1, 2, 2, 2, 2, 2, 2,
|
||||
1, 2, 2, 2, 3, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 2, 2, 2, 2, 3, 2, 2,
|
||||
2, 2, 2, 1, 1, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 3, 2, 1, 2, 4,
|
||||
2, 2, 1, 1, 1, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_range_lengths[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
static const short _ControlVerbs_index_offsets[] = {
|
||||
0, 8, 10, 13, 16, 19, 22, 25,
|
||||
28, 30, 33, 36, 39, 43, 46, 49,
|
||||
52, 55, 58, 61, 64, 67, 70, 73,
|
||||
76, 80, 83, 86, 89, 92, 96, 99,
|
||||
102, 105, 108, 111, 114, 117, 120, 123,
|
||||
126, 129, 132, 135, 138, 141, 144, 147,
|
||||
151, 154, 157, 160, 163, 166, 169, 172,
|
||||
175, 178, 181, 184, 187, 190, 193, 196,
|
||||
199, 202, 205, 208, 212, 215, 217, 220,
|
||||
225, 228, 231, 233, 235, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_cond_targs[] = {
|
||||
75, 2, 9, 22, 24, 45, 67, 1,
|
||||
75, 1, 75, 3, 1, 75, 4, 1,
|
||||
75, 5, 1, 75, 6, 1, 75, 7,
|
||||
1, 75, 8, 1, 75, 1, 75, 10,
|
||||
1, 75, 11, 1, 75, 12, 1, 75,
|
||||
13, 16, 1, 75, 14, 1, 75, 15,
|
||||
1, 75, 5, 1, 75, 17, 1, 75,
|
||||
18, 1, 75, 19, 1, 75, 20, 1,
|
||||
75, 21, 1, 75, 8, 1, 75, 23,
|
||||
1, 75, 7, 1, 75, 8, 25, 1,
|
||||
75, 26, 1, 75, 27, 1, 75, 28,
|
||||
1, 75, 29, 1, 75, 30, 37, 1,
|
||||
75, 31, 1, 75, 32, 1, 75, 33,
|
||||
1, 75, 34, 1, 75, 35, 1, 75,
|
||||
36, 1, 75, 36, 1, 75, 38, 1,
|
||||
75, 39, 1, 75, 40, 1, 75, 41,
|
||||
1, 75, 42, 1, 75, 43, 1, 75,
|
||||
44, 1, 75, 34, 1, 75, 46, 1,
|
||||
75, 47, 1, 75, 48, 59, 1, 75,
|
||||
49, 1, 75, 50, 1, 75, 51, 1,
|
||||
75, 52, 1, 75, 53, 1, 75, 54,
|
||||
1, 75, 55, 1, 75, 56, 1, 75,
|
||||
57, 1, 75, 58, 1, 75, 8, 1,
|
||||
75, 60, 1, 75, 61, 1, 75, 62,
|
||||
1, 75, 63, 1, 75, 64, 1, 75,
|
||||
65, 1, 75, 66, 1, 75, 8, 1,
|
||||
75, 68, 70, 1, 75, 69, 1, 75,
|
||||
1, 75, 71, 1, 75, 72, 73, 74,
|
||||
1, 75, 8, 1, 75, 8, 1, 75,
|
||||
1, 76, 75, 0, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 75, 75, 75, 75, 75, 75,
|
||||
75, 75, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_cond_actions[] = {
|
||||
19, 0, 0, 0, 0, 0, 0, 0,
|
||||
13, 0, 13, 0, 0, 13, 0, 0,
|
||||
11, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 11, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 11, 0, 0, 13, 0, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 11, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 13, 0, 0, 13, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 13,
|
||||
0, 0, 13, 0, 0, 13, 0, 0,
|
||||
13, 0, 0, 0, 13, 0, 0, 9,
|
||||
0, 13, 0, 0, 7, 0, 0, 0,
|
||||
0, 13, 0, 0, 13, 0, 0, 7,
|
||||
0, 5, 15, 0, 17, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19,
|
||||
0, 17, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_to_state_actions[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0
|
||||
};
|
||||
|
||||
static const signed char _ControlVerbs_from_state_actions[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 3, 0, 0
|
||||
};
|
||||
|
||||
static const short _ControlVerbs_eof_trans[] = {
|
||||
238, 239, 240, 241, 242, 243, 244, 245,
|
||||
246, 247, 248, 249, 250, 251, 252, 253,
|
||||
254, 255, 256, 257, 258, 259, 260, 261,
|
||||
262, 263, 264, 265, 266, 267, 268, 269,
|
||||
270, 271, 272, 273, 274, 275, 276, 277,
|
||||
278, 279, 280, 281, 282, 283, 284, 285,
|
||||
286, 287, 288, 289, 290, 291, 292, 293,
|
||||
294, 295, 296, 297, 298, 299, 300, 301,
|
||||
302, 303, 304, 305, 306, 307, 308, 309,
|
||||
310, 311, 312, 313, 314, 0
|
||||
};
|
||||
|
||||
static const int ControlVerbs_start = 75;
|
||||
static const int ControlVerbs_first_final = 75;
|
||||
static const int ControlVerbs_error = -1;
|
||||
|
||||
static const int ControlVerbs_en_main = 75;
|
||||
|
||||
|
||||
#line 269 "control_verbs.cpp"
|
||||
{
|
||||
cs = (int)ControlVerbs_start;
|
||||
ts = 0;
|
||||
te = 0;
|
||||
}
|
||||
|
||||
#line 105 "control_verbs.rl"
|
||||
|
||||
|
||||
try {
|
||||
|
||||
#line 278 "control_verbs.cpp"
|
||||
{
|
||||
int _klen;
|
||||
unsigned int _trans = 0;
|
||||
const char * _keys;
|
||||
const signed char * _acts;
|
||||
unsigned int _nacts;
|
||||
_resume: {}
|
||||
if ( p == pe && p != eof )
|
||||
goto _out;
|
||||
_acts = ( _ControlVerbs_actions + (_ControlVerbs_from_state_actions[cs]));
|
||||
_nacts = (unsigned int)(*( _acts));
|
||||
_acts += 1;
|
||||
while ( _nacts > 0 ) {
|
||||
switch ( (*( _acts)) ) {
|
||||
case 1: {
|
||||
{
|
||||
#line 1 "NONE"
|
||||
{ts = p;}}
|
||||
|
||||
#line 297 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
_nacts -= 1;
|
||||
_acts += 1;
|
||||
}
|
||||
|
||||
if ( p == eof ) {
|
||||
if ( _ControlVerbs_eof_trans[cs] > 0 ) {
|
||||
_trans = (unsigned int)_ControlVerbs_eof_trans[cs] - 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
_keys = ( _ControlVerbs_trans_keys + (_ControlVerbs_key_offsets[cs]));
|
||||
_trans = (unsigned int)_ControlVerbs_index_offsets[cs];
|
||||
|
||||
_klen = (int)_ControlVerbs_single_lengths[cs];
|
||||
if ( _klen > 0 ) {
|
||||
const char *_lower = _keys;
|
||||
const char *_upper = _keys + _klen - 1;
|
||||
const char *_mid;
|
||||
while ( 1 ) {
|
||||
if ( _upper < _lower ) {
|
||||
_keys += _klen;
|
||||
_trans += (unsigned int)_klen;
|
||||
break;
|
||||
}
|
||||
|
||||
_mid = _lower + ((_upper-_lower) >> 1);
|
||||
if ( ( (*( p))) < (*( _mid)) )
|
||||
_upper = _mid - 1;
|
||||
else if ( ( (*( p))) > (*( _mid)) )
|
||||
_lower = _mid + 1;
|
||||
else {
|
||||
_trans += (unsigned int)(_mid - _keys);
|
||||
goto _match;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_klen = (int)_ControlVerbs_range_lengths[cs];
|
||||
if ( _klen > 0 ) {
|
||||
const char *_lower = _keys;
|
||||
const char *_upper = _keys + (_klen<<1) - 2;
|
||||
const char *_mid;
|
||||
while ( 1 ) {
|
||||
if ( _upper < _lower ) {
|
||||
_trans += (unsigned int)_klen;
|
||||
break;
|
||||
}
|
||||
|
||||
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
|
||||
if ( ( (*( p))) < (*( _mid)) )
|
||||
_upper = _mid - 2;
|
||||
else if ( ( (*( p))) > (*( _mid + 1)) )
|
||||
_lower = _mid + 2;
|
||||
else {
|
||||
_trans += (unsigned int)((_mid - _keys)>>1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_match: {}
|
||||
}
|
||||
cs = (int)_ControlVerbs_cond_targs[_trans];
|
||||
|
||||
if ( _ControlVerbs_cond_actions[_trans] != 0 ) {
|
||||
|
||||
_acts = ( _ControlVerbs_actions + (_ControlVerbs_cond_actions[_trans]));
|
||||
_nacts = (unsigned int)(*( _acts));
|
||||
_acts += 1;
|
||||
while ( _nacts > 0 ) {
|
||||
switch ( (*( _acts)) )
|
||||
{
|
||||
case 2: {
|
||||
{
|
||||
#line 1 "NONE"
|
||||
{te = p+1;}}
|
||||
|
||||
#line 378 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
{
|
||||
#line 76 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 76 "control_verbs.rl"
|
||||
|
||||
mode.utf8 = true;
|
||||
}
|
||||
}}
|
||||
|
||||
#line 391 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
{
|
||||
#line 80 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 80 "control_verbs.rl"
|
||||
|
||||
mode.ucp = true;
|
||||
}
|
||||
}}
|
||||
|
||||
#line 404 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
{
|
||||
#line 84 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 84 "control_verbs.rl"
|
||||
|
||||
ostringstream str;
|
||||
str << "Unsupported control verb " << string(ts, te - ts);
|
||||
throw LocatedParseError(str.str());
|
||||
}
|
||||
}}
|
||||
|
||||
#line 419 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
{
|
||||
#line 90 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 90 "control_verbs.rl"
|
||||
|
||||
ostringstream str;
|
||||
str << "Unknown control verb " << string(ts, te - ts);
|
||||
throw LocatedParseError(str.str());
|
||||
}
|
||||
}}
|
||||
|
||||
#line 434 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 7: {
|
||||
{
|
||||
#line 97 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
#line 97 "control_verbs.rl"
|
||||
|
||||
{p = p - 1; }
|
||||
{p += 1; goto _out; }
|
||||
}
|
||||
}}
|
||||
|
||||
#line 448 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
{
|
||||
#line 97 "control_verbs.rl"
|
||||
{te = p;p = p - 1;{
|
||||
#line 97 "control_verbs.rl"
|
||||
|
||||
{p = p - 1; }
|
||||
{p += 1; goto _out; }
|
||||
}
|
||||
}}
|
||||
|
||||
#line 462 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
case 9: {
|
||||
{
|
||||
#line 97 "control_verbs.rl"
|
||||
{p = ((te))-1;
|
||||
{
|
||||
#line 97 "control_verbs.rl"
|
||||
|
||||
{p = p - 1; }
|
||||
{p += 1; goto _out; }
|
||||
}
|
||||
}}
|
||||
|
||||
#line 477 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
_nacts -= 1;
|
||||
_acts += 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( p == eof ) {
|
||||
if ( cs >= 75 )
|
||||
goto _out;
|
||||
}
|
||||
else {
|
||||
_acts = ( _ControlVerbs_actions + (_ControlVerbs_to_state_actions[cs]));
|
||||
_nacts = (unsigned int)(*( _acts));
|
||||
_acts += 1;
|
||||
while ( _nacts > 0 ) {
|
||||
switch ( (*( _acts)) ) {
|
||||
case 0: {
|
||||
{
|
||||
#line 1 "NONE"
|
||||
{ts = 0;}}
|
||||
|
||||
#line 502 "control_verbs.cpp"
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
_nacts -= 1;
|
||||
_acts += 1;
|
||||
}
|
||||
|
||||
p += 1;
|
||||
goto _resume;
|
||||
}
|
||||
_out: {}
|
||||
}
|
||||
|
||||
#line 108 "control_verbs.rl"
|
||||
|
||||
} catch (LocatedParseError &error) {
|
||||
if (ts >= ptr && ts <= pe) {
|
||||
error.locate(ts - ptr + start);
|
||||
} else {
|
||||
error.locate(0);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
@ -1,443 +0,0 @@
|
||||
|
||||
#line 1 "control_verbs.rl"
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Parser for control verbs that can occur at the beginning of a pattern.
|
||||
*/
|
||||
|
||||
#include "parser/control_verbs.h"
|
||||
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/parse_error.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
const char *read_control_verbs(const char *ptr, const char *end, size_t start,
|
||||
ParseMode &mode) {
|
||||
const char *p = ptr;
|
||||
const char *pe = end;
|
||||
const char *eof = pe;
|
||||
const char *ts, *te;
|
||||
int cs;
|
||||
UNUSED int act;
|
||||
|
||||
|
||||
#line 59 "control_verbs.cpp"
|
||||
static const char _ControlVerbs_actions[] = {
|
||||
0, 1, 0, 1, 1, 1, 2, 1,
|
||||
3, 1, 4, 1, 5, 1, 6, 1,
|
||||
7, 1, 8, 1, 9
|
||||
};
|
||||
|
||||
static const unsigned char _ControlVerbs_key_offsets[] = {
|
||||
0, 7, 8, 10, 12, 14, 16, 18,
|
||||
20, 21, 23, 25, 27, 30, 32, 34,
|
||||
36, 38, 40, 42, 44, 46, 48, 50,
|
||||
52, 55, 57, 59, 61, 63, 66, 68,
|
||||
70, 72, 74, 76, 79, 82, 84, 86,
|
||||
88, 90, 92, 94, 96, 98, 100, 102,
|
||||
105, 107, 109, 111, 113, 115, 117, 119,
|
||||
121, 123, 125, 127, 129, 131, 133, 135,
|
||||
137, 139, 141, 143, 146, 148, 149, 151,
|
||||
155, 157, 159, 160, 161
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_trans_keys[] = {
|
||||
41, 65, 66, 67, 76, 78, 85, 41,
|
||||
41, 78, 41, 89, 41, 67, 41, 82,
|
||||
41, 76, 41, 70, 41, 41, 83, 41,
|
||||
82, 41, 95, 41, 65, 85, 41, 78,
|
||||
41, 89, 41, 67, 41, 78, 41, 73,
|
||||
41, 67, 41, 79, 41, 68, 41, 69,
|
||||
41, 82, 41, 76, 41, 70, 73, 41,
|
||||
77, 41, 73, 41, 84, 41, 95, 41,
|
||||
77, 82, 41, 65, 41, 84, 41, 67,
|
||||
41, 72, 41, 61, 41, 48, 57, 41,
|
||||
48, 57, 41, 69, 41, 67, 41, 85,
|
||||
41, 82, 41, 83, 41, 73, 41, 79,
|
||||
41, 78, 41, 79, 41, 95, 41, 65,
|
||||
83, 41, 85, 41, 84, 41, 79, 41,
|
||||
95, 41, 80, 41, 79, 41, 83, 41,
|
||||
83, 41, 69, 41, 83, 41, 83, 41,
|
||||
84, 41, 65, 41, 82, 41, 84, 41,
|
||||
95, 41, 79, 41, 80, 41, 84, 41,
|
||||
67, 84, 41, 80, 41, 41, 70, 41,
|
||||
49, 51, 56, 41, 54, 41, 50, 41,
|
||||
40, 42, 0
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_single_lengths[] = {
|
||||
7, 1, 2, 2, 2, 2, 2, 2,
|
||||
1, 2, 2, 2, 3, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 2, 2, 2, 2, 3, 2, 2,
|
||||
2, 2, 2, 1, 1, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 3, 2, 1, 2, 4,
|
||||
2, 2, 1, 1, 1
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_range_lengths[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
static const short _ControlVerbs_index_offsets[] = {
|
||||
0, 8, 10, 13, 16, 19, 22, 25,
|
||||
28, 30, 33, 36, 39, 43, 46, 49,
|
||||
52, 55, 58, 61, 64, 67, 70, 73,
|
||||
76, 80, 83, 86, 89, 92, 96, 99,
|
||||
102, 105, 108, 111, 114, 117, 120, 123,
|
||||
126, 129, 132, 135, 138, 141, 144, 147,
|
||||
151, 154, 157, 160, 163, 166, 169, 172,
|
||||
175, 178, 181, 184, 187, 190, 193, 196,
|
||||
199, 202, 205, 208, 212, 215, 217, 220,
|
||||
225, 228, 231, 233, 235
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_indicies[] = {
|
||||
0, 2, 3, 4, 5, 6, 7, 1,
|
||||
8, 1, 8, 9, 1, 8, 10, 1,
|
||||
11, 12, 1, 8, 13, 1, 8, 14,
|
||||
1, 8, 15, 1, 11, 1, 8, 16,
|
||||
1, 8, 17, 1, 8, 18, 1, 8,
|
||||
19, 20, 1, 8, 21, 1, 8, 22,
|
||||
1, 8, 12, 1, 8, 23, 1, 8,
|
||||
24, 1, 8, 25, 1, 8, 26, 1,
|
||||
8, 27, 1, 8, 15, 1, 8, 28,
|
||||
1, 11, 14, 1, 8, 15, 29, 1,
|
||||
8, 30, 1, 8, 31, 1, 8, 32,
|
||||
1, 8, 33, 1, 8, 34, 35, 1,
|
||||
8, 36, 1, 8, 37, 1, 8, 38,
|
||||
1, 8, 39, 1, 8, 40, 1, 8,
|
||||
41, 1, 11, 41, 1, 8, 42, 1,
|
||||
8, 43, 1, 8, 44, 1, 8, 45,
|
||||
1, 8, 46, 1, 8, 47, 1, 8,
|
||||
48, 1, 8, 39, 1, 8, 49, 1,
|
||||
8, 50, 1, 8, 51, 52, 1, 8,
|
||||
53, 1, 8, 54, 1, 8, 55, 1,
|
||||
8, 56, 1, 8, 57, 1, 8, 58,
|
||||
1, 8, 59, 1, 8, 60, 1, 8,
|
||||
61, 1, 8, 62, 1, 8, 15, 1,
|
||||
8, 63, 1, 8, 64, 1, 8, 65,
|
||||
1, 8, 66, 1, 8, 67, 1, 8,
|
||||
68, 1, 8, 69, 1, 8, 15, 1,
|
||||
8, 70, 71, 1, 8, 72, 1, 73,
|
||||
1, 8, 74, 1, 75, 76, 77, 78,
|
||||
1, 8, 15, 1, 8, 15, 1, 75,
|
||||
1, 80, 79, 82, 81, 0
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_trans_targs[] = {
|
||||
75, 1, 2, 9, 22, 24, 45, 67,
|
||||
75, 3, 4, 75, 5, 6, 7, 8,
|
||||
10, 11, 12, 13, 16, 14, 15, 17,
|
||||
18, 19, 20, 21, 23, 25, 26, 27,
|
||||
28, 29, 30, 37, 31, 32, 33, 34,
|
||||
35, 36, 38, 39, 40, 41, 42, 43,
|
||||
44, 46, 47, 48, 59, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 58, 60,
|
||||
61, 62, 63, 64, 65, 66, 68, 70,
|
||||
69, 75, 71, 75, 72, 73, 74, 75,
|
||||
76, 75, 0
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_trans_actions[] = {
|
||||
19, 0, 0, 0, 0, 0, 0, 0,
|
||||
13, 0, 0, 11, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 9, 0, 7, 0, 0, 0, 15,
|
||||
5, 17, 0
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_to_state_actions[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0
|
||||
};
|
||||
|
||||
static const char _ControlVerbs_from_state_actions[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 3, 0
|
||||
};
|
||||
|
||||
static const short _ControlVerbs_eof_trans[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 0, 82
|
||||
};
|
||||
|
||||
static const int ControlVerbs_start = 75;
|
||||
static const int ControlVerbs_first_final = 75;
|
||||
static const int ControlVerbs_error = -1;
|
||||
|
||||
static const int ControlVerbs_en_main = 75;
|
||||
|
||||
|
||||
#line 249 "control_verbs.cpp"
|
||||
{
|
||||
cs = ControlVerbs_start;
|
||||
ts = 0;
|
||||
te = 0;
|
||||
act = 0;
|
||||
}
|
||||
|
||||
#line 105 "control_verbs.rl"
|
||||
|
||||
|
||||
try {
|
||||
|
||||
#line 262 "control_verbs.cpp"
|
||||
{
|
||||
int _klen;
|
||||
unsigned int _trans;
|
||||
const char *_acts;
|
||||
unsigned int _nacts;
|
||||
const char *_keys;
|
||||
|
||||
if ( p == pe )
|
||||
goto _test_eof;
|
||||
_resume:
|
||||
_acts = _ControlVerbs_actions + _ControlVerbs_from_state_actions[cs];
|
||||
_nacts = (unsigned int) *_acts++;
|
||||
while ( _nacts-- > 0 ) {
|
||||
switch ( *_acts++ ) {
|
||||
case 1:
|
||||
#line 1 "NONE"
|
||||
{ts = p;}
|
||||
break;
|
||||
#line 281 "control_verbs.cpp"
|
||||
}
|
||||
}
|
||||
|
||||
_keys = _ControlVerbs_trans_keys + _ControlVerbs_key_offsets[cs];
|
||||
_trans = _ControlVerbs_index_offsets[cs];
|
||||
|
||||
_klen = _ControlVerbs_single_lengths[cs];
|
||||
if ( _klen > 0 ) {
|
||||
const char *_lower = _keys;
|
||||
const char *_mid;
|
||||
const char *_upper = _keys + _klen - 1;
|
||||
while (1) {
|
||||
if ( _upper < _lower )
|
||||
break;
|
||||
|
||||
_mid = _lower + ((_upper-_lower) >> 1);
|
||||
if ( (*p) < *_mid )
|
||||
_upper = _mid - 1;
|
||||
else if ( (*p) > *_mid )
|
||||
_lower = _mid + 1;
|
||||
else {
|
||||
_trans += (unsigned int)(_mid - _keys);
|
||||
goto _match;
|
||||
}
|
||||
}
|
||||
_keys += _klen;
|
||||
_trans += _klen;
|
||||
}
|
||||
|
||||
_klen = _ControlVerbs_range_lengths[cs];
|
||||
if ( _klen > 0 ) {
|
||||
const char *_lower = _keys;
|
||||
const char *_mid;
|
||||
const char *_upper = _keys + (_klen<<1) - 2;
|
||||
while (1) {
|
||||
if ( _upper < _lower )
|
||||
break;
|
||||
|
||||
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
|
||||
if ( (*p) < _mid[0] )
|
||||
_upper = _mid - 2;
|
||||
else if ( (*p) > _mid[1] )
|
||||
_lower = _mid + 2;
|
||||
else {
|
||||
_trans += (unsigned int)((_mid - _keys)>>1);
|
||||
goto _match;
|
||||
}
|
||||
}
|
||||
_trans += _klen;
|
||||
}
|
||||
|
||||
_match:
|
||||
_trans = _ControlVerbs_indicies[_trans];
|
||||
_eof_trans:
|
||||
cs = _ControlVerbs_trans_targs[_trans];
|
||||
|
||||
if ( _ControlVerbs_trans_actions[_trans] == 0 )
|
||||
goto _again;
|
||||
|
||||
_acts = _ControlVerbs_actions + _ControlVerbs_trans_actions[_trans];
|
||||
_nacts = (unsigned int) *_acts++;
|
||||
while ( _nacts-- > 0 )
|
||||
{
|
||||
switch ( *_acts++ )
|
||||
{
|
||||
case 2:
|
||||
#line 1 "NONE"
|
||||
{te = p+1;}
|
||||
break;
|
||||
case 3:
|
||||
#line 76 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
mode.utf8 = true;
|
||||
}}
|
||||
break;
|
||||
case 4:
|
||||
#line 80 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
mode.ucp = true;
|
||||
}}
|
||||
break;
|
||||
case 5:
|
||||
#line 84 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
ostringstream str;
|
||||
str << "Unsupported control verb " << string(ts, te - ts);
|
||||
throw LocatedParseError(str.str());
|
||||
}}
|
||||
break;
|
||||
case 6:
|
||||
#line 90 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
ostringstream str;
|
||||
str << "Unknown control verb " << string(ts, te - ts);
|
||||
throw LocatedParseError(str.str());
|
||||
}}
|
||||
break;
|
||||
case 7:
|
||||
#line 97 "control_verbs.rl"
|
||||
{te = p+1;{
|
||||
p--;
|
||||
{p++; goto _out; }
|
||||
}}
|
||||
break;
|
||||
case 8:
|
||||
#line 97 "control_verbs.rl"
|
||||
{te = p;p--;{
|
||||
p--;
|
||||
{p++; goto _out; }
|
||||
}}
|
||||
break;
|
||||
case 9:
|
||||
#line 97 "control_verbs.rl"
|
||||
{{p = ((te))-1;}{
|
||||
p--;
|
||||
{p++; goto _out; }
|
||||
}}
|
||||
break;
|
||||
#line 400 "control_verbs.cpp"
|
||||
}
|
||||
}
|
||||
|
||||
_again:
|
||||
_acts = _ControlVerbs_actions + _ControlVerbs_to_state_actions[cs];
|
||||
_nacts = (unsigned int) *_acts++;
|
||||
while ( _nacts-- > 0 ) {
|
||||
switch ( *_acts++ ) {
|
||||
case 0:
|
||||
#line 1 "NONE"
|
||||
{ts = 0;}
|
||||
break;
|
||||
#line 413 "control_verbs.cpp"
|
||||
}
|
||||
}
|
||||
|
||||
if ( ++p != pe )
|
||||
goto _resume;
|
||||
_test_eof: {}
|
||||
if ( p == eof )
|
||||
{
|
||||
if ( _ControlVerbs_eof_trans[cs] > 0 ) {
|
||||
_trans = _ControlVerbs_eof_trans[cs] - 1;
|
||||
goto _eof_trans;
|
||||
}
|
||||
}
|
||||
|
||||
_out: {}
|
||||
}
|
||||
|
||||
#line 109 "control_verbs.rl"
|
||||
} catch (LocatedParseError &error) {
|
||||
if (ts >= ptr && ts <= pe) {
|
||||
error.locate(ts - ptr + start);
|
||||
} else {
|
||||
error.locate(0);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
@ -130,6 +130,7 @@ def parse_env_variables(
|
||||
ARM_SUFFIX = "-aarch64"
|
||||
FREEBSD_SUFFIX = "-freebsd"
|
||||
PPC_SUFFIX = "-ppc64le"
|
||||
AMD64_SSE2_SUFFIX = "-amd64sse2"
|
||||
|
||||
result = []
|
||||
result.append("OUTPUT_DIR=/output")
|
||||
@ -141,6 +142,7 @@ def parse_env_variables(
|
||||
is_cross_arm = compiler.endswith(ARM_SUFFIX)
|
||||
is_cross_ppc = compiler.endswith(PPC_SUFFIX)
|
||||
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
|
||||
is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX)
|
||||
|
||||
if is_cross_darwin:
|
||||
cc = compiler[: -len(DARWIN_SUFFIX)]
|
||||
@ -186,6 +188,10 @@ def parse_env_variables(
|
||||
cmake_flags.append(
|
||||
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
|
||||
)
|
||||
elif is_amd64_sse2:
|
||||
cc = compiler[: -len(AMD64_SSE2_SUFFIX)]
|
||||
result.append("DEB_ARCH=amd64")
|
||||
cmake_flags.append("-DNO_SSE3_OR_HIGHER=1")
|
||||
else:
|
||||
cc = compiler
|
||||
result.append("DEB_ARCH=amd64")
|
||||
@ -339,6 +345,7 @@ if __name__ == "__main__":
|
||||
"clang-14-darwin-aarch64",
|
||||
"clang-14-aarch64",
|
||||
"clang-14-ppc64le",
|
||||
"clang-14-amd64sse2",
|
||||
"clang-14-freebsd",
|
||||
"gcc-11",
|
||||
),
|
||||
|
@ -1,8 +1,15 @@
|
||||
#!/bin/bash
|
||||
# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031
|
||||
|
||||
set -eux
|
||||
set -x
|
||||
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
trap "exit" INT TERM
|
||||
# The watchdog is in the separate process group, so we have to kill it separately
|
||||
# if the script terminates earlier.
|
||||
@ -87,6 +94,19 @@ function configure
|
||||
# TODO figure out which ones are needed
|
||||
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
|
||||
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
|
||||
|
||||
cat > db/config.d/core.xml <<EOL
|
||||
<clickhouse>
|
||||
<core_dump>
|
||||
<!-- 100GiB -->
|
||||
<size_limit>107374182400</size_limit>
|
||||
</core_dump>
|
||||
<!-- NOTE: no need to configure core_path,
|
||||
since clickhouse is not started as daemon (via clickhouse start)
|
||||
-->
|
||||
<core_path>$PWD</core_path>
|
||||
</clickhouse>
|
||||
EOL
|
||||
}
|
||||
|
||||
function watchdog
|
||||
@ -180,7 +200,6 @@ handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
gcore
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
|
@ -8,6 +8,9 @@ services:
|
||||
- type: bind
|
||||
source: ${keeper_binary:-}
|
||||
target: /usr/bin/clickhouse
|
||||
- type: bind
|
||||
source: ${keeper_binary:-}
|
||||
target: /usr/bin/clickhouse-keeper
|
||||
- type: bind
|
||||
source: ${keeper_config_dir1:-}
|
||||
target: /etc/clickhouse-keeper
|
||||
@ -38,6 +41,9 @@ services:
|
||||
- type: bind
|
||||
source: ${keeper_binary:-}
|
||||
target: /usr/bin/clickhouse
|
||||
- type: bind
|
||||
source: ${keeper_binary:-}
|
||||
target: /usr/bin/clickhouse-keeper
|
||||
- type: bind
|
||||
source: ${keeper_config_dir2:-}
|
||||
target: /etc/clickhouse-keeper
|
||||
@ -68,6 +74,9 @@ services:
|
||||
- type: bind
|
||||
source: ${keeper_binary:-}
|
||||
target: /usr/bin/clickhouse
|
||||
- type: bind
|
||||
source: ${keeper_binary:-}
|
||||
target: /usr/bin/clickhouse-keeper
|
||||
- type: bind
|
||||
source: ${keeper_config_dir3:-}
|
||||
target: /etc/clickhouse-keeper
|
||||
|
@ -3,8 +3,14 @@
|
||||
# shellcheck disable=SC2086
|
||||
# shellcheck disable=SC2024
|
||||
|
||||
# Avoid overlaps with previous runs
|
||||
dmesg --clear
|
||||
|
||||
set -x
|
||||
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
|
||||
# Thread Fuzzer allows to check more permutations of possible thread scheduling
|
||||
# and find more potential issues.
|
||||
|
||||
@ -101,6 +107,19 @@ EOL
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
|
||||
<clickhouse>
|
||||
<core_dump>
|
||||
<!-- 100GiB -->
|
||||
<size_limit>107374182400</size_limit>
|
||||
</core_dump>
|
||||
<!-- NOTE: no need to configure core_path,
|
||||
since clickhouse is not started as daemon (via clickhouse start)
|
||||
-->
|
||||
<core_path>$PWD</core_path>
|
||||
</clickhouse>
|
||||
EOL
|
||||
}
|
||||
|
||||
@ -157,7 +176,6 @@ handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
gcore
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
@ -501,8 +519,7 @@ done
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
# Core dumps (see gcore)
|
||||
# Default filename is 'core.PROCESS_ID'
|
||||
# Core dumps
|
||||
for core in core.*; do
|
||||
pigz $core
|
||||
mv $core.gz /test_output/
|
||||
|
@ -140,6 +140,6 @@ hash cmake
|
||||
|
||||
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
|
||||
|
||||
They are built for stable, prestable and testing releases as long as for every commit to master and for every pull request.
|
||||
Binaries are built for stable and LTS releases and also every commit to `master` for each pull request.
|
||||
|
||||
To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green check mark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”.
|
||||
|
654
docs/en/getting-started/example-datasets/nypd_complaint_data.md
Normal file
654
docs/en/getting-started/example-datasets/nypd_complaint_data.md
Normal file
@ -0,0 +1,654 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/nypd_complaint_data
|
||||
sidebar_label: NYPD Complaint Data
|
||||
description: "Ingest and query Tab Separated Value data in 5 steps"
|
||||
title: NYPD Complaint Data
|
||||
---
|
||||
|
||||
Tab separated value, or TSV, files are common and may include field headings as the first line of the file. ClickHouse can ingest TSVs, and also can query TSVs without ingesting the files. This guide covers both of these cases. If you need to query or ingest CSV files, the same techniques work, simply substitute `TSV` with `CSV` in your format arguments.
|
||||
|
||||
While working through this guide you will:
|
||||
- **Investigate**: Query the structure and content of the TSV file.
|
||||
- **Determine the target ClickHouse schema**: Choose proper data types and map the existing data to those types.
|
||||
- **Create a ClickHouse table**.
|
||||
- **Preprocess and stream** the data to ClickHouse.
|
||||
- **Run some queries** against ClickHouse.
|
||||
|
||||
The dataset used in this guide comes from the NYC Open Data team, and contains data about "all valid felony, misdemeanor, and violation crimes reported to the New York City Police Department (NYPD)". At the time of writing, the data file is 166MB, but it is updated regularly.
|
||||
|
||||
**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243)
|
||||
**Terms of use**: https://www1.nyc.gov/home/terms-of-use.page
|
||||
|
||||
## Prerequisites
|
||||
- Download the dataset by visiting the [NYPD Complaint Data Current (Year To Date)](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243) page, clicking the Export button, and choosing **TSV for Excel**.
|
||||
- Install [ClickHouse server and client](../../getting-started/install.md).
|
||||
- [Launch](../../getting-started/install.md#launch) ClickHouse server, and connect with `clickhouse-client`
|
||||
|
||||
### A note about the commands described in this guide
|
||||
There are two types of commands in this guide:
|
||||
- Some of the commands are querying the TSV files, these are run at the command prompt.
|
||||
- The rest of the commands are querying ClickHouse, and these are run in the `clickhouse-client` or Play UI.
|
||||
|
||||
:::note
|
||||
The examples in this guide assume that you have saved the TSV file to `${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`, please adjust the commands if needed.
|
||||
:::
|
||||
|
||||
## Familiarize yourself with the TSV file
|
||||
|
||||
Before starting to work with the ClickHouse database familiarize yourself with the data.
|
||||
|
||||
### Look at the fields in the source TSV file
|
||||
|
||||
This is an example of a command to query a TSV file, but don't run it yet.
|
||||
```sh
|
||||
clickhouse-local --query \
|
||||
"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
|
||||
```
|
||||
|
||||
Sample response
|
||||
```response
|
||||
CMPLNT_NUM Nullable(Float64)
|
||||
ADDR_PCT_CD Nullable(Float64)
|
||||
BORO_NM Nullable(String)
|
||||
CMPLNT_FR_DT Nullable(String)
|
||||
CMPLNT_FR_TM Nullable(String)
|
||||
```
|
||||
|
||||
:::tip
|
||||
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](../../guides/developer/working-with-json/json-semi-structured.md/#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
|
||||
you can get a better idea of the content.
|
||||
|
||||
Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled.
|
||||
:::
|
||||
|
||||
Run this command at your command prompt. You will be using `clickhouse-local` to query the data in the TSV file you downloaded.
|
||||
```sh
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
CMPLNT_NUM Nullable(String)
|
||||
ADDR_PCT_CD Nullable(Float64)
|
||||
BORO_NM Nullable(String)
|
||||
CMPLNT_FR_DT Nullable(String)
|
||||
CMPLNT_FR_TM Nullable(String)
|
||||
CMPLNT_TO_DT Nullable(String)
|
||||
CMPLNT_TO_TM Nullable(String)
|
||||
CRM_ATPT_CPTD_CD Nullable(String)
|
||||
HADEVELOPT Nullable(String)
|
||||
HOUSING_PSA Nullable(Float64)
|
||||
JURISDICTION_CODE Nullable(Float64)
|
||||
JURIS_DESC Nullable(String)
|
||||
KY_CD Nullable(Float64)
|
||||
LAW_CAT_CD Nullable(String)
|
||||
LOC_OF_OCCUR_DESC Nullable(String)
|
||||
OFNS_DESC Nullable(String)
|
||||
PARKS_NM Nullable(String)
|
||||
PATROL_BORO Nullable(String)
|
||||
PD_CD Nullable(Float64)
|
||||
PD_DESC Nullable(String)
|
||||
PREM_TYP_DESC Nullable(String)
|
||||
RPT_DT Nullable(String)
|
||||
STATION_NAME Nullable(String)
|
||||
SUSP_AGE_GROUP Nullable(String)
|
||||
SUSP_RACE Nullable(String)
|
||||
SUSP_SEX Nullable(String)
|
||||
TRANSIT_DISTRICT Nullable(Float64)
|
||||
VIC_AGE_GROUP Nullable(String)
|
||||
VIC_RACE Nullable(String)
|
||||
VIC_SEX Nullable(String)
|
||||
X_COORD_CD Nullable(Float64)
|
||||
Y_COORD_CD Nullable(Float64)
|
||||
Latitude Nullable(Float64)
|
||||
Longitude Nullable(Float64)
|
||||
Lat_Lon Tuple(Nullable(Float64), Nullable(Float64))
|
||||
New Georeferenced Column Nullable(String)
|
||||
```
|
||||
|
||||
At this point you should check that the columns in the TSV file match the names and types specified in the **Columns in this Dataset** section of the [dataset web page](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243). The data types are not very specific, all numeric fields are set to `Nullable(Float64)`, and all other fields are `Nullable(String)`. When you create a ClickHouse table to store the data you can specify more appropriate and performant types.
|
||||
|
||||
### Determine the proper schema
|
||||
|
||||
In order to figure out what types should be used for the fields it is necessary to know what the data looks like. For example, the field `JURISDICTION_CODE` is a numeric: should it be a `UInt8`, or an `Enum`, or is `Float64` appropriate?
|
||||
|
||||
```sql
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select JURISDICTION_CODE, count() FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
GROUP BY JURISDICTION_CODE
|
||||
ORDER BY JURISDICTION_CODE
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─JURISDICTION_CODE─┬─count()─┐
|
||||
│ 0 │ 188875 │
|
||||
│ 1 │ 4799 │
|
||||
│ 2 │ 13833 │
|
||||
│ 3 │ 656 │
|
||||
│ 4 │ 51 │
|
||||
│ 6 │ 5 │
|
||||
│ 7 │ 2 │
|
||||
│ 9 │ 13 │
|
||||
│ 11 │ 14 │
|
||||
│ 12 │ 5 │
|
||||
│ 13 │ 2 │
|
||||
│ 14 │ 70 │
|
||||
│ 15 │ 20 │
|
||||
│ 72 │ 159 │
|
||||
│ 87 │ 9 │
|
||||
│ 88 │ 75 │
|
||||
│ 97 │ 405 │
|
||||
└───────────────────┴─────────┘
|
||||
```
|
||||
|
||||
The query response shows that the `JURISDICTION_CODE` fits well in a `UInt8`.
|
||||
|
||||
Similarly, look at some of the `String` fields and see if they are well suited to being `DateTime` or [`LowCardinality(String)`](../../sql-reference/data-types/lowcardinality.md) fields.
|
||||
|
||||
For example, the field `PARKS_NM` is described as "Name of NYC park, playground or greenspace of occurrence, if applicable (state parks are not included)". The names of parks in New York City may be a good candidate for a `LowCardinality(String)`:
|
||||
|
||||
```sh
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select count(distinct PARKS_NM) FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─uniqExact(PARKS_NM)─┐
|
||||
│ 319 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
Have a look at some of the park names:
|
||||
```sql
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select distinct PARKS_NM FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
LIMIT 10
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─PARKS_NM───────────────────┐
|
||||
│ (null) │
|
||||
│ ASSER LEVY PARK │
|
||||
│ JAMES J WALKER PARK │
|
||||
│ BELT PARKWAY/SHORE PARKWAY │
|
||||
│ PROSPECT PARK │
|
||||
│ MONTEFIORE SQUARE │
|
||||
│ SUTTON PLACE PARK │
|
||||
│ JOYCE KILMER PARK │
|
||||
│ ALLEY ATHLETIC PLAYGROUND │
|
||||
│ ASTORIA PARK │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
||||
The dataset in use at the time of writing has only a few hundred distinct parks and playgrounds in the `PARK_NM` column. This is a small number based on the [LowCardinality](../../sql-reference/data-types/lowcardinality.md#lowcardinality-dscr) recommendation to stay below 10,000 distinct strings in a `LowCardinality(String)` field.
|
||||
|
||||
### DateTime fields
|
||||
Based on the **Columns in this Dataset** section of the [dataset web page](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243) there are date and time fields for the start and end of the reported event. Looking at the min and max of the `CMPLNT_FR_DT` and `CMPLT_TO_DT` gives an idea of whether or not the fields are always populated:
|
||||
|
||||
```sh title="CMPLNT_FR_DT"
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select min(CMPLNT_FR_DT), max(CMPLNT_FR_DT) FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─min(CMPLNT_FR_DT)─┬─max(CMPLNT_FR_DT)─┐
|
||||
│ 01/01/1973 │ 12/31/2021 │
|
||||
└───────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
```sh title="CMPLNT_TO_DT"
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select min(CMPLNT_TO_DT), max(CMPLNT_TO_DT) FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─min(CMPLNT_TO_DT)─┬─max(CMPLNT_TO_DT)─┐
|
||||
│ │ 12/31/2021 │
|
||||
└───────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
```sh title="CMPLNT_FR_TM"
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select min(CMPLNT_FR_TM), max(CMPLNT_FR_TM) FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─min(CMPLNT_FR_TM)─┬─max(CMPLNT_FR_TM)─┐
|
||||
│ 00:00:00 │ 23:59:00 │
|
||||
└───────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
```sh title="CMPLNT_TO_TM"
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select min(CMPLNT_TO_TM), max(CMPLNT_TO_TM) FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─min(CMPLNT_TO_TM)─┬─max(CMPLNT_TO_TM)─┐
|
||||
│ (null) │ 23:59:00 │
|
||||
└───────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
## Make a plan
|
||||
|
||||
Based on the above investigation:
|
||||
- `JURISDICTION_CODE` should be cast as `UInt8`.
|
||||
- `PARKS_NM` should be cast to `LowCardinality(String)`
|
||||
- `CMPLNT_FR_DT` and `CMPLNT_FR_TM` are always populated (possibly with a default time of `00:00:00`)
|
||||
- `CMPLNT_TO_DT` and `CMPLNT_TO_TM` may be empty
|
||||
- Dates and times are stored in separate fields in the source
|
||||
- Dates are `mm/dd/yyyy` format
|
||||
- Times are `hh:mm:ss` format
|
||||
- Dates and times can be concatenated into DateTime types
|
||||
- There are some dates before January 1st 1970, which means we need a 64 bit DateTime
|
||||
|
||||
:::note
|
||||
There are many more changes to be made to the types, they all can be determined by following the same investigation steps. Look at the number of distinct strings in a field, the min and max of the numerics, and make your decisions. The table schema that is given later in the guide has many low cardinality strings and unsigned integer fields and very few floating point numerics.
|
||||
:::
|
||||
|
||||
## Concatenate the date and time fields
|
||||
|
||||
To concatenate the date and time fields `CMPLNT_FR_DT` and `CMPLNT_FR_TM` into a single `String` that can be cast to a `DateTime`, select the two fields joined by the concatenation operator: `CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM`. The `CMPLNT_TO_DT` and `CMPLNT_TO_TM` fields are handled similarly.
|
||||
|
||||
```sh
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM AS complaint_begin FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
LIMIT 10
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─complaint_begin─────┐
|
||||
│ 07/29/2010 00:01:00 │
|
||||
│ 12/01/2011 12:00:00 │
|
||||
│ 04/01/2017 15:00:00 │
|
||||
│ 03/26/2018 17:20:00 │
|
||||
│ 01/01/2019 00:00:00 │
|
||||
│ 06/14/2019 00:00:00 │
|
||||
│ 11/29/2021 20:00:00 │
|
||||
│ 12/04/2021 00:35:00 │
|
||||
│ 12/05/2021 12:50:00 │
|
||||
│ 12/07/2021 20:30:00 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## Convert the date and time String to a DateTime64 type
|
||||
|
||||
Earlier in the guide we discovered that there are dates in the TSV file before January 1st 1970, which means that we need a 64 bit DateTime type for the dates. The dates also need to be converted from `MM/DD/YYYY` to `YYYY/MM/DD` format. Both of these can be done with [`parseDateTime64BestEffort()`](../../sql-reference/functions/type-conversion-functions.md#parsedatetime64besteffort).
|
||||
|
||||
```sh
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"WITH (CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM) AS CMPLNT_START,
|
||||
(CMPLNT_TO_DT || ' ' || CMPLNT_TO_TM) AS CMPLNT_END
|
||||
select parseDateTime64BestEffort(CMPLNT_START) AS complaint_begin,
|
||||
parseDateTime64BestEffortOrNull(CMPLNT_END) AS complaint_end
|
||||
FROM file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
ORDER BY complaint_begin ASC
|
||||
LIMIT 25
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Lines 2 and 3 above contain the concatenation from the previous step, and lines 4 and 5 above parse the strings into `DateTime64`. As the complaint end time is not guaranteed to exist `parseDateTime64BestEffortOrNull` is used.
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─────────complaint_begin─┬───────────complaint_end─┐
|
||||
│ 1925-01-01 10:00:00.000 │ 2021-02-12 09:30:00.000 │
|
||||
│ 1925-01-01 11:37:00.000 │ 2022-01-16 11:49:00.000 │
|
||||
│ 1925-01-01 15:00:00.000 │ 2021-12-31 00:00:00.000 │
|
||||
│ 1925-01-01 15:00:00.000 │ 2022-02-02 22:00:00.000 │
|
||||
│ 1925-01-01 19:00:00.000 │ 2022-04-14 05:00:00.000 │
|
||||
│ 1955-09-01 19:55:00.000 │ 2022-08-01 00:45:00.000 │
|
||||
│ 1972-03-17 11:40:00.000 │ 2022-03-17 11:43:00.000 │
|
||||
│ 1972-05-23 22:00:00.000 │ 2022-05-24 09:00:00.000 │
|
||||
│ 1972-05-30 23:37:00.000 │ 2022-05-30 23:50:00.000 │
|
||||
│ 1972-07-04 02:17:00.000 │ ᴺᵁᴸᴸ │
|
||||
│ 1973-01-01 00:00:00.000 │ ᴺᵁᴸᴸ │
|
||||
│ 1975-01-01 00:00:00.000 │ ᴺᵁᴸᴸ │
|
||||
│ 1976-11-05 00:01:00.000 │ 1988-10-05 23:59:00.000 │
|
||||
│ 1977-01-01 00:00:00.000 │ 1977-01-01 23:59:00.000 │
|
||||
│ 1977-12-20 00:01:00.000 │ ᴺᵁᴸᴸ │
|
||||
│ 1981-01-01 00:01:00.000 │ ᴺᵁᴸᴸ │
|
||||
│ 1981-08-14 00:00:00.000 │ 1987-08-13 23:59:00.000 │
|
||||
│ 1983-01-07 00:00:00.000 │ 1990-01-06 00:00:00.000 │
|
||||
│ 1984-01-01 00:01:00.000 │ 1984-12-31 23:59:00.000 │
|
||||
│ 1985-01-01 12:00:00.000 │ 1987-12-31 15:00:00.000 │
|
||||
│ 1985-01-11 09:00:00.000 │ 1985-12-31 12:00:00.000 │
|
||||
│ 1986-03-16 00:05:00.000 │ 2022-03-16 00:45:00.000 │
|
||||
│ 1987-01-07 00:00:00.000 │ 1987-01-09 00:00:00.000 │
|
||||
│ 1988-04-03 18:30:00.000 │ 2022-08-03 09:45:00.000 │
|
||||
│ 1988-07-29 12:00:00.000 │ 1990-07-27 22:00:00.000 │
|
||||
└─────────────────────────┴─────────────────────────┘
|
||||
```
|
||||
:::note
|
||||
The dates shown as `1925` above are from errors in the data. There are several records in the original data with dates in the years `1019` - `1022` that should be `2019` - `2022`. They are being stored as Jan 1st 1925 as that is the earliest date with a 64 bit DateTime.
|
||||
:::
|
||||
|
||||
## Create a table
|
||||
|
||||
The decisions made above on the data types used for the columns are reflected in the table schema
|
||||
below. We also need to decide on the `ORDER BY` and `PRIMARY KEY` used for the table. At least one
|
||||
of `ORDER BY` or `PRIMARY KEY` must be specified. Here are some guidelines on deciding on the
|
||||
columns to includes in `ORDER BY`, and more information is in the *Next Steps* section at the end
|
||||
of this document.
|
||||
|
||||
### Order By and Primary Key clauses
|
||||
|
||||
- The `ORDER BY` tuple should include fields that are used in query filters
|
||||
- To maximize compression on disk the `ORDER BY` tuple should be ordered by ascending cardinality
|
||||
- If it exists, the `PRIMARY KEY` tuple must be a subset of the `ORDER BY` tuple
|
||||
- If only `ORDER BY` is specified, then the same tuple will be used as `PRIMARY KEY`
|
||||
- The primary key index is created using the `PRIMARY KEY` tuple if specified, otherwise the `ORDER BY` tuple
|
||||
- The `PRIMARY KEY` index is kept in main memory
|
||||
|
||||
Looking at the dataset and the questions that might be answered by querying it we might
|
||||
decide that we would look at the types of crimes reported over time in the five boroughs of
|
||||
New York City. These fields might be then included in the `ORDER BY`:
|
||||
|
||||
| Column | Description (from the data dictionary) |
|
||||
| ----------- | --------------------------------------------------- |
|
||||
| OFNS_DESC | Description of offense corresponding with key code |
|
||||
| RPT_DT | Date event was reported to police |
|
||||
| BORO_NM | The name of the borough in which the incident occurred |
|
||||
|
||||
|
||||
Querying the TSV file for the cardinality of the three candidate columns:
|
||||
|
||||
```bash
|
||||
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query \
|
||||
"select formatReadableQuantity(uniq(OFNS_DESC)) as cardinality_OFNS_DESC,
|
||||
formatReadableQuantity(uniq(RPT_DT)) as cardinality_RPT_DT,
|
||||
formatReadableQuantity(uniq(BORO_NM)) as cardinality_BORO_NM
|
||||
FROM
|
||||
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
|
||||
FORMAT PrettyCompact"
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
┌─cardinality_OFNS_DESC─┬─cardinality_RPT_DT─┬─cardinality_BORO_NM─┐
|
||||
│ 60.00 │ 306.00 │ 6.00 │
|
||||
└───────────────────────┴────────────────────┴─────────────────────┘
|
||||
```
|
||||
Ordering by cardinality, the `ORDER BY` becomes:
|
||||
|
||||
```
|
||||
ORDER BY ( BORO_NM, OFNS_DESC, RPT_DT )
|
||||
```
|
||||
:::note
|
||||
The table below will use more easily read column names, the above names will be mapped to
|
||||
```
|
||||
ORDER BY ( borough, offense_description, date_reported )
|
||||
```
|
||||
:::
|
||||
|
||||
Putting together the changes to data types and the `ORDER BY` tuple gives this table structure:
|
||||
|
||||
```sql
|
||||
CREATE TABLE NYPD_Complaint (
|
||||
complaint_number String,
|
||||
precinct UInt8,
|
||||
borough LowCardinality(String),
|
||||
complaint_begin DateTime64(0,'America/New_York'),
|
||||
complaint_end DateTime64(0,'America/New_York'),
|
||||
was_crime_completed String,
|
||||
housing_authority String,
|
||||
housing_level_code UInt32,
|
||||
jurisdiction_code UInt8,
|
||||
jurisdiction LowCardinality(String),
|
||||
offense_code UInt8,
|
||||
offense_level LowCardinality(String),
|
||||
location_descriptor LowCardinality(String),
|
||||
offense_description LowCardinality(String),
|
||||
park_name LowCardinality(String),
|
||||
patrol_borough LowCardinality(String),
|
||||
PD_CD UInt16,
|
||||
PD_DESC String,
|
||||
location_type LowCardinality(String),
|
||||
date_reported Date,
|
||||
transit_station LowCardinality(String),
|
||||
suspect_age_group LowCardinality(String),
|
||||
suspect_race LowCardinality(String),
|
||||
suspect_sex LowCardinality(String),
|
||||
transit_district UInt8,
|
||||
victim_age_group LowCardinality(String),
|
||||
victim_race LowCardinality(String),
|
||||
victim_sex LowCardinality(String),
|
||||
NY_x_coordinate UInt32,
|
||||
NY_y_coordinate UInt32,
|
||||
Latitude Float64,
|
||||
Longitude Float64
|
||||
) ENGINE = MergeTree
|
||||
ORDER BY ( borough, offense_description, date_reported )
|
||||
```
|
||||
|
||||
### Finding the primary key of a table
|
||||
|
||||
The ClickHouse `system` database, specifically `system.table` has all of the information about the table you
|
||||
just created. This query shows the `ORDER BY` (sorting key), and the `PRIMARY KEY`:
|
||||
```sql
|
||||
SELECT
|
||||
partition_key,
|
||||
sorting_key,
|
||||
primary_key,
|
||||
table
|
||||
FROM system.tables
|
||||
WHERE table = 'NYPD_Complaint'
|
||||
FORMAT Vertical
|
||||
```
|
||||
|
||||
Response
|
||||
```response
|
||||
Query id: 6a5b10bf-9333-4090-b36e-c7f08b1d9e01
|
||||
|
||||
Row 1:
|
||||
──────
|
||||
partition_key:
|
||||
sorting_key: borough, offense_description, date_reported
|
||||
primary_key: borough, offense_description, date_reported
|
||||
table: NYPD_Complaint
|
||||
|
||||
1 row in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
## Preprocess and Import Data {#preprocess-import-data}
|
||||
|
||||
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
|
||||
|
||||
### `clickhouse-local` arguments used
|
||||
|
||||
:::tip
|
||||
`table='input'` appears in the arguments to clickhouse-local below. clickhouse-local takes the provided input (`cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`) and inserts the input into a table. By default the table is named `table`. In this guide the name of the table is set to `input` to make the data flow clearer. The final argument to clickhouse-local is a query that selects from the table (`FROM input`) which is then piped to `clickhouse-client` to populate the table `NYPD_Complaint`.
|
||||
:::
|
||||
|
||||
```sql
|
||||
cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
|
||||
| clickhouse-local --table='input' --input-format='TSVWithNames' \
|
||||
--input_format_max_rows_to_read_for_schema_inference=2000 \
|
||||
--query "
|
||||
WITH (CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM) AS CMPLNT_START,
|
||||
(CMPLNT_TO_DT || ' ' || CMPLNT_TO_TM) AS CMPLNT_END
|
||||
SELECT
|
||||
CMPLNT_NUM AS complaint_number,
|
||||
ADDR_PCT_CD AS precinct,
|
||||
BORO_NM AS borough,
|
||||
parseDateTime64BestEffort(CMPLNT_START) AS complaint_begin,
|
||||
parseDateTime64BestEffortOrNull(CMPLNT_END) AS complaint_end,
|
||||
CRM_ATPT_CPTD_CD AS was_crime_completed,
|
||||
HADEVELOPT AS housing_authority_development,
|
||||
HOUSING_PSA AS housing_level_code,
|
||||
JURISDICTION_CODE AS jurisdiction_code,
|
||||
JURIS_DESC AS jurisdiction,
|
||||
KY_CD AS offense_code,
|
||||
LAW_CAT_CD AS offense_level,
|
||||
LOC_OF_OCCUR_DESC AS location_descriptor,
|
||||
OFNS_DESC AS offense_description,
|
||||
PARKS_NM AS park_name,
|
||||
PATROL_BORO AS patrol_borough,
|
||||
PD_CD,
|
||||
PD_DESC,
|
||||
PREM_TYP_DESC AS location_type,
|
||||
toDate(parseDateTimeBestEffort(RPT_DT)) AS date_reported,
|
||||
STATION_NAME AS transit_station,
|
||||
SUSP_AGE_GROUP AS suspect_age_group,
|
||||
SUSP_RACE AS suspect_race,
|
||||
SUSP_SEX AS suspect_sex,
|
||||
TRANSIT_DISTRICT AS transit_district,
|
||||
VIC_AGE_GROUP AS victim_age_group,
|
||||
VIC_RACE AS victim_race,
|
||||
VIC_SEX AS victim_sex,
|
||||
X_COORD_CD AS NY_x_coordinate,
|
||||
Y_COORD_CD AS NY_y_coordinate,
|
||||
Latitude,
|
||||
Longitude
|
||||
FROM input" \
|
||||
| clickhouse-client --query='INSERT INTO NYPD_Complaint FORMAT TSV'
|
||||
```
|
||||
|
||||
## Validate the Data {#validate-data}
|
||||
|
||||
:::note
|
||||
The dataset changes once or more per year, your counts may not match what is in this document.
|
||||
:::
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT count()
|
||||
FROM NYPD_Complaint
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─count()─┐
|
||||
│ 208993 │
|
||||
└─────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
The size of the dataset in ClickHouse is just 12% of the original TSV file, compare the size of the original TSV file with the size of the table:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableSize(total_bytes)
|
||||
FROM system.tables
|
||||
WHERE name = 'NYPD_Complaint'
|
||||
```
|
||||
|
||||
Result:
|
||||
```text
|
||||
┌─formatReadableSize(total_bytes)─┐
|
||||
│ 8.63 MiB │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## Run Some Queries {#run-queries}
|
||||
|
||||
### Query 1. Compare the number of complaints by month
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
dateName('month', date_reported) AS month,
|
||||
count() AS complaints,
|
||||
bar(complaints, 0, 50000, 80)
|
||||
FROM NYPD_Complaint
|
||||
GROUP BY month
|
||||
ORDER BY complaints DESC
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
Query id: 7fbd4244-b32a-4acf-b1f3-c3aa198e74d9
|
||||
|
||||
┌─month─────┬─complaints─┬─bar(count(), 0, 50000, 80)───────────────────────────────┐
|
||||
│ March │ 34536 │ ███████████████████████████████████████████████████████▎ │
|
||||
│ May │ 34250 │ ██████████████████████████████████████████████████████▋ │
|
||||
│ April │ 32541 │ ████████████████████████████████████████████████████ │
|
||||
│ January │ 30806 │ █████████████████████████████████████████████████▎ │
|
||||
│ February │ 28118 │ ████████████████████████████████████████████▊ │
|
||||
│ November │ 7474 │ ███████████▊ │
|
||||
│ December │ 7223 │ ███████████▌ │
|
||||
│ October │ 7070 │ ███████████▎ │
|
||||
│ September │ 6910 │ ███████████ │
|
||||
│ August │ 6801 │ ██████████▊ │
|
||||
│ June │ 6779 │ ██████████▋ │
|
||||
│ July │ 6485 │ ██████████▍ │
|
||||
└───────────┴────────────┴──────────────────────────────────────────────────────────┘
|
||||
|
||||
12 rows in set. Elapsed: 0.006 sec. Processed 208.99 thousand rows, 417.99 KB (37.48 million rows/s., 74.96 MB/s.)
|
||||
```
|
||||
|
||||
### Query 2. Compare total number of complaints by Borough
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
borough,
|
||||
count() AS complaints,
|
||||
bar(complaints, 0, 125000, 60)
|
||||
FROM NYPD_Complaint
|
||||
GROUP BY borough
|
||||
ORDER BY complaints DESC
|
||||
```
|
||||
|
||||
Result:
|
||||
```response
|
||||
Query id: 8cdcdfd4-908f-4be0-99e3-265722a2ab8d
|
||||
|
||||
┌─borough───────┬─complaints─┬─bar(count(), 0, 125000, 60)──┐
|
||||
│ BROOKLYN │ 57947 │ ███████████████████████████▋ │
|
||||
│ MANHATTAN │ 53025 │ █████████████████████████▍ │
|
||||
│ QUEENS │ 44875 │ █████████████████████▌ │
|
||||
│ BRONX │ 44260 │ █████████████████████▏ │
|
||||
│ STATEN ISLAND │ 8503 │ ████ │
|
||||
│ (null) │ 383 │ ▏ │
|
||||
└───────────────┴────────────┴──────────────────────────────┘
|
||||
|
||||
6 rows in set. Elapsed: 0.008 sec. Processed 208.99 thousand rows, 209.43 KB (27.14 million rows/s., 27.20 MB/s.)
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.
|
@ -4,10 +4,9 @@ sidebar_position: 1
|
||||
keywords: [clickhouse, install, installation, docs]
|
||||
description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
|
||||
slug: /en/getting-started/install
|
||||
title: Installation
|
||||
---
|
||||
|
||||
# Installation
|
||||
|
||||
## System Requirements {#system-requirements}
|
||||
|
||||
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
|
||||
@ -59,7 +58,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
|
||||
</details>
|
||||
|
||||
You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs.
|
||||
You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
|
||||
|
||||
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable).
|
||||
|
||||
@ -106,7 +105,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
|
||||
</details>
|
||||
|
||||
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available.
|
||||
You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
|
||||
|
||||
Then run these commands to install packages:
|
||||
|
||||
@ -221,7 +220,7 @@ For non-Linux operating systems and for AArch64 CPU architecture, ClickHouse bui
|
||||
curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse
|
||||
```
|
||||
|
||||
Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
|
||||
Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `sudo clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
|
||||
|
||||
Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data.
|
||||
|
||||
|
@ -2,10 +2,9 @@
|
||||
slug: /en/operations/backup
|
||||
sidebar_position: 49
|
||||
sidebar_label: Data backup and restore
|
||||
title: Data backup and restore
|
||||
---
|
||||
|
||||
# Data backup and restore
|
||||
|
||||
While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented.
|
||||
|
||||
In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**.
|
||||
|
@ -20,6 +20,7 @@ Additional cache types:
|
||||
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache.
|
||||
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
|
||||
- Schema inference cache.
|
||||
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
|
||||
|
||||
Indirectly used:
|
||||
|
||||
|
@ -1452,7 +1452,7 @@ Port for communicating with clients over MySQL protocol.
|
||||
|
||||
**Possible values**
|
||||
|
||||
Positive integer.
|
||||
Positive integer to specify the port number to listen to or empty value to disable.
|
||||
|
||||
Example
|
||||
|
||||
@ -1466,7 +1466,7 @@ Port for communicating with clients over PostgreSQL protocol.
|
||||
|
||||
**Possible values**
|
||||
|
||||
Positive integer.
|
||||
Positive integer to specify the port number to listen to or empty value to disable.
|
||||
|
||||
Example
|
||||
|
||||
|
@ -1176,8 +1176,9 @@ Enables the quorum writes.
|
||||
|
||||
- If `insert_quorum < 2`, the quorum writes are disabled.
|
||||
- If `insert_quorum >= 2`, the quorum writes are enabled.
|
||||
- If `insert_quorum = 'auto'`, use majority number (`number_of_replicas / 2 + 1`) as quorum number.
|
||||
|
||||
Default value: 0.
|
||||
Default value: 0 - disabled.
|
||||
|
||||
Quorum writes
|
||||
|
||||
@ -1259,7 +1260,7 @@ Possible values:
|
||||
|
||||
Default value: 1.
|
||||
|
||||
By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
|
||||
By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
|
||||
For the replicated tables by default the only 100 of the most recent blocks for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
|
||||
For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window).
|
||||
|
||||
|
@ -112,6 +112,119 @@ Example of disk configuration:
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## Using local cache {#using-local-cache}
|
||||
|
||||
It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. Cache uses `LRU` cache policy.
|
||||
|
||||
Example of configuration for versions later or equal to 22.8:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>...</endpoint>
|
||||
... s3 configuration ...
|
||||
</s3>
|
||||
<cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/s3_cache/</path>
|
||||
<max_size>10000000</max_size>
|
||||
</cache>
|
||||
</disks>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Example of configuration for versions earlier than 22.8:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<s3>
|
||||
<type>s3</type>
|
||||
<endpoint>...</endpoint>
|
||||
... s3 configuration ...
|
||||
<data_cache_enabled>1</data_cache_enabled>
|
||||
<data_cache_size>10000000</data_cache_size>
|
||||
</s3>
|
||||
</disks>
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Cache **configuration settings**:
|
||||
|
||||
- `path` - path to the directory with cache. Default: None, this setting is obligatory.
|
||||
|
||||
- `max_size` - maximum size of the cache in bytes. When the limit is reached, cache files are evicted according to the cache eviction policy. Default: None, this setting is obligatory.
|
||||
|
||||
- `cache_on_write_operations` - allow to turn on `write-through` cache (caching data on any write operations: `INSERT` queries, background merges). Default: `false`. The `write-through` cache can be disabled per query using setting `enable_filesystem_cache_on_write_operations` (data is cached only if both cache config settings and corresponding query setting are enabled).
|
||||
|
||||
- `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`.
|
||||
|
||||
- `enable_cache_hits_threshold` - a number, which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it.
|
||||
|
||||
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `true`.
|
||||
|
||||
- `max_file_segment_size` - a maximum size of a single cache file. Default: `104857600` (100 Mb).
|
||||
|
||||
- `max_elements` - a limit for a number of cache files. Default: `1048576`.
|
||||
|
||||
Cache **query settings**:
|
||||
|
||||
- `enable_filesystem_cache` - allows to disable cache per query even if storage policy was configured with `cache` disk type. Default: `true`.
|
||||
|
||||
- `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`.
|
||||
|
||||
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on.
|
||||
|
||||
- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. Default: `false`.
|
||||
|
||||
- `max_query_cache_size` - a limit for the cache size, which can be written to local cache storage. Requires enabled `enable_filesystem_query_cache_limit` in cache configuration. Default: `false`.
|
||||
|
||||
- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recentltly used` behaviour while keeping query cache limit.
|
||||
|
||||
** Warning **
|
||||
Cache configuration settings and cache query settings correspond to the latest ClickHouse version, for earlier versions something might not be supported.
|
||||
|
||||
Cache **system tables**:
|
||||
|
||||
- `system.filesystem_cache` - system tables which shows current state of cache.
|
||||
|
||||
- `system.filesystem_cache_log` - system table which shows detailed cache usage per query. Requires `enable_filesystem_cache_log` setting to be `true`.
|
||||
|
||||
Cache **commands**:
|
||||
|
||||
- `SYSTEM DROP FILESYSTEM CACHE (<path>) (ON CLUSTER)`
|
||||
|
||||
- `SHOW CACHES` -- show list of caches which were configured on the server.
|
||||
|
||||
- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command.
|
||||
|
||||
Cache current metrics:
|
||||
|
||||
- `FilesystemCacheSize`
|
||||
|
||||
- `FilesystemCacheElements`
|
||||
|
||||
Cache asynchronous metrics:
|
||||
|
||||
- `FilesystemCacheBytes`
|
||||
|
||||
- `FilesystemCacheFiles`
|
||||
|
||||
Cache profile events:
|
||||
|
||||
- `CachedReadBufferReadFromSourceBytes`, `CachedReadBufferReadFromCacheBytes,`
|
||||
|
||||
- `CachedReadBufferReadFromSourceMicroseconds`, `CachedReadBufferReadFromCacheMicroseconds`
|
||||
|
||||
- `CachedReadBufferCacheWriteBytes`, `CachedReadBufferCacheWriteMicroseconds`
|
||||
|
||||
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
|
||||
|
||||
## Storing Data on Web Server {#storing-data-on-webserver}
|
||||
|
||||
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
|
||||
|
@ -74,13 +74,16 @@ Make sure that [`fstrim`](https://en.wikipedia.org/wiki/Trim_(computing)) is ena
|
||||
|
||||
## File System {#file-system}
|
||||
|
||||
Ext4 is the most reliable option. Set the mount options `noatime`.
|
||||
XFS should be avoided. It works mostly fine but there are some reports about lower performance.
|
||||
Ext4 is the most reliable option. Set the mount options `noatime`. XFS works well too.
|
||||
Most other file systems should also work fine.
|
||||
|
||||
FAT-32 and exFAT are not supported due to lack of hard links.
|
||||
|
||||
Do not use compressed filesystems, because ClickHouse does compression on its own and better.
|
||||
It's not recommended to use encrypted filesystems, because you can use builtin encryption in ClickHouse, which is better.
|
||||
|
||||
While ClickHouse can work over NFS, it is not the best idea.
|
||||
|
||||
## Linux Kernel {#linux-kernel}
|
||||
|
||||
Don’t use an outdated Linux kernel.
|
||||
|
@ -640,7 +640,8 @@ Result:
|
||||
|
||||
## date\_diff
|
||||
|
||||
Returns the difference between two dates or dates with time values.
|
||||
Returns the difference between two dates or dates with time values.
|
||||
The difference is calculated using relative units, e.g. the difference between `2022-01-01` and `2021-12-29` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -692,6 +693,25 @@ Result:
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
toDate('2022-01-01') AS e,
|
||||
toDate('2021-12-29') AS s,
|
||||
dateDiff('day', s, e) AS day_diff,
|
||||
dateDiff('month', s, e) AS month__diff,
|
||||
dateDiff('year', s, e) AS year_diff;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐
|
||||
│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │
|
||||
└────────────┴────────────┴──────────┴─────────────┴───────────┘
|
||||
```
|
||||
|
||||
## date\_sub
|
||||
|
||||
Subtracts the time interval or date interval from the provided date or date with time.
|
||||
|
@ -12,8 +12,9 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
|
||||
|
||||
Deletes data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
:::note
|
||||
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use.
|
||||
|
||||
:::note
|
||||
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. `ALTER TABLE` is considered a heavyweight operation that requires the underlying data to be merged before it is deleted. For MergeTree tables, consider using the [`DELETE FROM` query](../delete.md), which performs a lightweight delete and can be considerably faster.
|
||||
:::
|
||||
|
||||
The `filter_expr` must be of type `UInt8`. The query deletes rows in the table for which this expression takes a non-zero value.
|
||||
|
37
docs/en/sql-reference/statements/delete.md
Normal file
37
docs/en/sql-reference/statements/delete.md
Normal file
@ -0,0 +1,37 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/delete
|
||||
sidebar_position: 36
|
||||
sidebar_label: DELETE
|
||||
---
|
||||
|
||||
# DELETE Statement
|
||||
|
||||
``` sql
|
||||
DELETE FROM [db.]table [WHERE expr]
|
||||
```
|
||||
|
||||
`DELETE FROM` removes rows from table `[db.]table` that match expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in background. This feature is only available for MergeTree table engine family.
|
||||
|
||||
For example, the following query deletes all rows from the `hits` table where the `Title` column contains the text `hello`:
|
||||
|
||||
```sql
|
||||
DELETE FROM hits WHERE Title LIKE '%hello%';
|
||||
```
|
||||
|
||||
Lightweight deletes are asynchronous by default. Set `mutations_sync` equal to 1 to wait for one replica to process the statement, and set `mutations_sync` to 2 to wait for all replicas.
|
||||
|
||||
:::note
|
||||
This feature is experimental and requires you to set `allow_experimental_lightweight_delete` to true:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_lightweight_delete = true;
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
|
||||
|
||||
:::warning
|
||||
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on OLTP system. Ligthweight deletes are currently efficient for wide parts, but for compact parts they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
|
||||
:::
|
||||
|
@ -10,7 +10,7 @@ Makes the server "forget" about the existence of a table, a materialized view, o
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
DETACH TABLE|VIEW|DICTIONARY [IF EXISTS] [db.]name [ON CLUSTER cluster] [PERMANENTLY]
|
||||
DETACH TABLE|VIEW|DICTIONARY [IF EXISTS] [db.]name [ON CLUSTER cluster] [PERMANENTLY] [SYNC]
|
||||
```
|
||||
|
||||
Detaching does not delete the data or metadata of a table, a materialized view or a dictionary. If an entity was not detached `PERMANENTLY`, on the next server launch the server will read the metadata and recall the table/view/dictionary again. If an entity was detached `PERMANENTLY`, there will be no automatic recall.
|
||||
@ -24,6 +24,8 @@ Note that you can not detach permanently the table which is already detached (te
|
||||
|
||||
Also you can not [DROP](../../sql-reference/statements/drop#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query.
|
||||
|
||||
The `SYNC` modifier executes the action without delay.
|
||||
|
||||
**Example**
|
||||
|
||||
Creating a table:
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: DROP
|
||||
|
||||
# DROP Statements
|
||||
|
||||
Deletes existing entity. If the `IF EXISTS` clause is specified, these queries do not return an error if the entity does not exist.
|
||||
Deletes existing entity. If the `IF EXISTS` clause is specified, these queries do not return an error if the entity does not exist. If the `SYNC` modifier is specified, the entity is dropped without delay.
|
||||
|
||||
## DROP DATABASE
|
||||
|
||||
@ -15,7 +15,7 @@ Deletes all tables inside the `db` database, then deletes the `db` database itse
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster]
|
||||
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
|
||||
```
|
||||
|
||||
## DROP TABLE
|
||||
@ -25,7 +25,7 @@ Deletes the table.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
|
||||
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
|
||||
```
|
||||
|
||||
## DROP DICTIONARY
|
||||
@ -35,7 +35,7 @@ Deletes the dictionary.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
DROP DICTIONARY [IF EXISTS] [db.]name
|
||||
DROP DICTIONARY [IF EXISTS] [db.]name [SYNC]
|
||||
```
|
||||
|
||||
## DROP USER
|
||||
@ -95,7 +95,7 @@ Deletes a view. Views can be deleted by a `DROP TABLE` command as well but `DROP
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster]
|
||||
DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
|
||||
```
|
||||
|
||||
## DROP FUNCTION
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
slug: /en/development/tests
|
||||
slug: /zh/development/tests
|
||||
sidebar_position: 70
|
||||
sidebar_label: Testing
|
||||
title: ClickHouse Testing
|
||||
|
@ -1,264 +0,0 @@
|
||||
---
|
||||
slug: /zh/whats-new/changelog/2017
|
||||
---
|
||||
### ClickHouse 版本 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21}
|
||||
|
||||
此版本包含先前版本 1.1.54318 的错误修复:
|
||||
|
||||
- 修复了可能导致数据丢失的复制中可能出现的竞争条件的错误. 此问题影响1.1.54310和1.1.54318版本. 如果将这些版本的任意一个与 Replicated 表一起使用,则强烈建议进行更新. 此问题显示在日志中的警告消息中,例如 `Part ... from own log does not exist.` 即使您没有在日志中看到这些消息,该问题也是相关的.
|
||||
|
||||
### ClickHouse 版本 1.1.54318, 2017-11-30 {#clickhouse-release-1-1-54318-2017-11-30}
|
||||
|
||||
此版本包含先前版本 1.1.54310 的错误修复:
|
||||
|
||||
- 修复了在 SummingMergeTree 引擎中合并期间不正确的行删除.
|
||||
- 修复了未复制的 MergeTree 引擎中的内存泄漏.
|
||||
- 修复了在 MergeTree 引擎中频繁插入导致性能下降的问题.
|
||||
- 修复了导致复制队列停止运行的问题.
|
||||
- 修复了服务器日志的轮换和归档.
|
||||
|
||||
### ClickHouse 版本 1.1.54310, 2017-11-01 {#clickhouse-release-1-1-54310-2017-11-01}
|
||||
|
||||
#### 新特征: {#new-features}
|
||||
|
||||
- MergeTree 系列表引擎的自定义分区键.
|
||||
- [Kafka](https://clickhouse.com/docs/en/operations/table_engines/kafka/) 表引擎.
|
||||
- 增加了对加载[CatBoost](https://catboost.yandex/)模型的支持, 并将它们应用到存储在ClickHouse中的数据.
|
||||
- 添加了对 UTC 非整数偏移时区的支持
|
||||
- 添加了对具有时间间隔的算术运算的支持.
|
||||
- Date 和 DateTime 类型的值范围扩展到 2105 年.
|
||||
- 添加了 `CREATE MATERIALIZED VIEW x TO y` 查询(指定用于存储物化视图数据的现有表).
|
||||
- 添加了不带参数的`ATTACH TABLE` 查询.
|
||||
- SummingMergeTree 表中名称以 -Map 结尾的嵌套列的处理逻辑被提取到 sumMap 聚合函数中. 您现在可以明确指定此类列.
|
||||
- IP 树字典的最大大小增加到 128M 条目.
|
||||
- 添加了 getSizeOfEnumType 函数.
|
||||
- 添加了 sumWithOverflow 聚合函数.
|
||||
- 添加了对 Cap'n Proto 输入格式的支持.
|
||||
- 您现在可以在使用 zstd 算法时自定义压缩级别.
|
||||
|
||||
#### 向后不兼容的变化: {#backward-incompatible-changes}
|
||||
|
||||
- 不允许使用内存以外的引擎创建临时表.
|
||||
- 不允许使用 View 或 MaterializedView 引擎显式创建表.
|
||||
- 在表创建期间,新的检查验证采样键表达式是否包含在主键中.
|
||||
|
||||
#### Bug 修复: {#bug-fixes}
|
||||
|
||||
- 修复了同步插入分布式表时的挂断问题.
|
||||
- 修复了复制表中部件的非原子添加和删除.
|
||||
- 插入物化视图的数据不会进行不必要的重复数据删除.
|
||||
- 对本地副本滞后且远程副本不可用的分布式表执行查询不再导致错误.
|
||||
- 用户不再需要访问 `default` 数据库的权限来创建临时表.
|
||||
- 修复了指定不带参数的 Array 类型时崩溃的问题.
|
||||
- 修复了包含服务器日志的磁盘卷已满时的挂断问题.
|
||||
- 修复了 Unix 纪元第一周 toRelativeWeekNum 函数中的溢出问题.
|
||||
|
||||
#### Build 改进: {#build-improvements}
|
||||
|
||||
- 更新了多个第三方库(尤其是 Poco)并转换为 git 子模块.
|
||||
|
||||
### ClickHouse 版本 1.1.54304, 2017-10-19 {#clickhouse-release-1-1-54304-2017-10-19}
|
||||
|
||||
#### 新特征: {#new-features-1}
|
||||
|
||||
- 本机协议中的 TLS 支持(要启用,请在 `config.xml` 中设置 `tcp_ssl_port`).
|
||||
|
||||
#### Bug 修复: {#bug-fixes-1}
|
||||
|
||||
- 复制表的`ALTER` 现在尝试尽快开始运行.
|
||||
- 修复了使用设置 `preferred_block_size_bytes=0.` 读取数据时崩溃的问题.
|
||||
- 修复了按下 `Page Down` 时 `clickhouse-client` 崩溃的问题.
|
||||
- 使用 `GLOBAL IN` 和 `UNION ALL` 正确解释某些复杂的查询.
|
||||
- `FREEZE PARTITION` 现在总是以原子方式工作.
|
||||
- 空 POST 请求现在返回代码为 411 的响应.
|
||||
- 修正了像 `CAST(1 AS Nullable(UInt8)).` 这样的表达式的解释错误.
|
||||
- 修正了从 `MergeTree` 表中读取 `Array(Nullable(String))` 列时的错误.
|
||||
- 修复了在解析诸如 `SELECT dummy AS dummy, dummy AS b` 之类的查询时崩溃的问题.
|
||||
- 用户使用无效的 `users.xml` 正确更新.
|
||||
- 可执行字典返回非零响应代码时的正确处理.
|
||||
|
||||
### ClickHouse 版本 1.1.54292, 2017-09-20 {#clickhouse-release-1-1-54292-2017-09-20}
|
||||
|
||||
#### 新特征: {#new-features-2}
|
||||
|
||||
- 添加了用于处理坐标平面上的坐标的 `pointInPolygon` 函数.
|
||||
- 添加了用于计算数组总和的 `sumMap` 聚合函数, 类似于 `SummingMergeTree` .
|
||||
- 添加了 `trunc` 功能. 改进了舍入函数(`round`、`floor`、`ceil`、`roundToExp2`)的性能并更正了它们工作方式的逻辑. 更改了分数和负数的 `roundToExp2` 函数的逻辑.
|
||||
- ClickHouse 可执行文件现在较少依赖于 libc 版本. 同一个 ClickHouse 可执行文件可以在各种 Linux 系统上运行. 使用编译查询时仍然存在依赖性(使用设置 `compile = 1` , 默认情况下不使用).
|
||||
- 减少动态编译查询所需的时间.
|
||||
|
||||
#### Bug 修复: {#bug-fixes-2}
|
||||
|
||||
- 修复了有时会产生 `part ... intersects previous part` 消息和削弱副本一致性的错误.
|
||||
- 修复了关闭期间 ZooKeeper 不可用导致服务器锁定的错误.
|
||||
- 恢复副本时删除了过多的日志记录.
|
||||
- 修复了 UNION ALL 实现中的错误.
|
||||
- 修复了如果块中的第一列具有 Array 类型时在 concat 函数中发生的错误.
|
||||
- 进度现在在 system.merges 表中可以正确显示.
|
||||
|
||||
### ClickHouse 版本 1.1.54289, 2017-09-13 {#clickhouse-release-1-1-54289-2017-09-13}
|
||||
|
||||
#### 新特征: {#new-features-3}
|
||||
|
||||
- 用于服务器管理的 `SYSTEM` 查询: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`.
|
||||
- 添加了用于处理数组的函数: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`.
|
||||
- 为 ZooKeeper 配置添加了 `root` 和 `identity` 参数. 这将允许您隔离同一 ZooKeeper 集群上的各个用户.
|
||||
- 添加了聚合函数 `groupBitAnd` 、 `groupBitOr` 和 `groupBitXor` (为了兼容性,它们也可以在名称 `BIT_AND` 、`BIT_OR`和`BIT_XOR` 下使用).
|
||||
- 可以通过在文件系统中指定套接字来从 MySQL 加载外部字典.
|
||||
- 可以通过 SSL 从 MySQL 加载外部字典 (`ssl_cert`, `ssl_key`, `ssl_ca` 参数).
|
||||
- 添加了 `max_network_bandwidth_for_user` 设置以限制每个用户查询的总体带宽使用.
|
||||
- 支持临时表的 `DROP TABLE`.
|
||||
- 支持从 `CSV` 和 `JSONEachRow` 格式读取 Unix 时间戳格式的 `DateTime` 值.
|
||||
- 现在默认排除分布式查询中的滞后副本(默认阈值为 5 分钟).
|
||||
- 在 ALTER 期间使用 FIFO 锁定:对于连续运行的查询,ALTER 查询不会无限期阻塞.
|
||||
- 在配置文件中设置 `umask` 的选项.
|
||||
- 使用 `DISTINCT` 提高查询的性能.
|
||||
|
||||
#### Bug 修复: {#bug-fixes-3}
|
||||
|
||||
- 改进了在 ZooKeeper 中删除旧节点的过程. 以前, 如果插入非常频繁, 旧节点有时不会被删除, 从而导致服务器关闭缓慢等.
|
||||
- 修复了为 ZooKeeper 连接选择主机时的随机化问题.
|
||||
- 如果副本是本地主机, 则修复了在分布式查询中排除滞后副本的问题.
|
||||
- 修复了在 `嵌套` 结构中的元素上运行 `ALTER MODIFY` 后, `ReplicatedMergeTree` 表中的数据部分可能被破坏的错误.
|
||||
- 修复了可能导致 SELECT 查询 `hang` 的错误.
|
||||
- 分布式 DDL 查询的改进.
|
||||
- 修复了查询 `CREATE TABLE ... AS <materialized view>`.
|
||||
- 解决了对 Buffer 表的 `ALTER ... CLEAR COLUMN IN PARTITION` 查询中的死锁.
|
||||
- 修复了使用 `JSONEachRow` 和 `TSKV` 格式时 `Enum` 的无效默认值 (0 而不是最小值).
|
||||
- 解决了使用带有 `可执行` 源的字典时出现僵尸进程的问题.
|
||||
- 修复了 HEAD 查询的段错误.
|
||||
|
||||
#### 改进了开发和组装ClickHouse的工作流: {#improved-workflow-for-developing-and-assembling-clickhouse}
|
||||
|
||||
- 您可以使用 `pbuilder` 来构建 ClickHouse.
|
||||
- 你可以使用 `libc++` 代替 `libstdc++` 在 Linux 上构建.
|
||||
- 添加了使用静态代码分析工具的说明: `Coverage`, `clang-tidy`, `cppcheck`.
|
||||
|
||||
#### 升级时请注意: {#please-note-when-upgrading}
|
||||
|
||||
- 现在有更高的 MergeTree 设置默认值 `max_bytes_to_merge_at_max_space_in_pool` (要合并的数据部分的最大总大小, 以字节为单位): 它已从 100 GiB 增加到 150 GiB. 这可能会导致在服务器升级后运行大型合并, 从而导致磁盘子系统负载增加. 如果服务器上的可用空间小于正在运行的合并总量的两倍, 这将导致所有其他合并停止运行, 包括小数据部分的合并. 因此, INSERT 查询将失败并显示消息"合并的处理速度明显慢于插入." , 使用 `SELECT * FROM system.merges` 查询来监控情况. 您还可以在 `system.metrics` 表或 Graphite 中检查 `DiskSpaceReservedForMerge` 指标. 您不需要做任何事情来解决这个问题, 因为一旦大型合并完成, 问题就会自行解决. 如果您发现这不可接受, 您可以恢复 `max_bytes_to_merge_at_max_space_in_pool` 设置的先前值. 为此, 请转到 config.xml 中的 `<merge_tree>` 部分, 设置 ``` <merge_tree>``<max_bytes_to_merge_at_max_space_in_pool>107374182400</max_bytes_to_merge_at_max_space_in_pool> ``` 并重新启动服务器.
|
||||
|
||||
### ClickHouse 版本 1.1.54284, 2017-08-29 {#clickhouse-release-1-1-54284-2017-08-29}
|
||||
|
||||
- 这是先前 1.1.54282 版本的错误修复版本. 它修复了 ZooKeeper 中部分目录中的泄漏.
|
||||
|
||||
### ClickHouse 版本 1.1.54282, 2017-08-23 {#clickhouse-release-1-1-54282-2017-08-23}
|
||||
|
||||
此版本包含先前版本 1.1.54276 的错误修复:
|
||||
|
||||
- 修复了插入分布式表时的 `DB::Exception: Assertion violation: !_path.empty()`.
|
||||
- 如果输入数据以 ';' 开头, 则在以 RowBinary 格式插入时固定解析.
|
||||
- 某些聚合函数 (例如 `groupArray()` ) 的运行时编译期间的错误.
|
||||
|
||||
### ClickHouse 版本 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16}
|
||||
|
||||
#### 新特征: {#new-features-4}
|
||||
|
||||
- SELECT 查询添加了一个可选的 WITH 部分. 示例查询:`WITH 1+1 AS a SELECT a, a*a` .
|
||||
- NSERT 可以在分布式表中同步执行:只有在所有数据都保存在所有分片上后才返回 OK. 这是通过设置 `insert_distributed_sync=1` 激活的.
|
||||
- 添加了用于处理 16 字节标识符的 UUID 数据类型.
|
||||
- 添加了 CHAR、FLOAT 和其他类型的别名以与 Tableau 兼容.
|
||||
- 新增 toYYYYMM, toYYYYMMDD, toYYYYMMDDhhmmss 时间转数字功能.
|
||||
- 您可以使用 IP 地址 (与主机名一起) 来识别集群 DDL 查询的服务器.
|
||||
- 在函数 `substring(str, pos, len)` 中添加了对非常量参数和负偏移量的支持.
|
||||
- 为 `groupArray(max_size)(column)` 聚合函数增加了max_size参数, 并优化了其性能.
|
||||
|
||||
#### Main Changes: {#main-changes}
|
||||
|
||||
- 安全改进:所有服务器文件都使用 0640 权限创建(可以通过 `<umask>` 配置参数更改).
|
||||
- 改进了语法无效查询的错误消息.
|
||||
- 合并大段 MergeTree 数据时显着减少内存消耗并提高性能.
|
||||
- 显着提高了 ReplacingMergeTree 引擎的数据合并性能.
|
||||
- 通过组合多个源插入提高了从分布式表进行异步插入的性能. 要启用此功能, 请使用设置 `distributed_directory_monitor_batch_inserts=1` .
|
||||
|
||||
#### Backward Incompatible Changes: {#backward-incompatible-changes-1}
|
||||
|
||||
- 更改了数组 `groupArray(array_column)` 函数聚合状态的二进制格式.
|
||||
|
||||
#### Complete List of Changes: {#complete-list-of-changes}
|
||||
|
||||
- 添加了 `output_format_json_quote_denormals` 设置, 可以以 JSON 格式输出 nan 和 inf 值.
|
||||
- 从分布式表读取时优化流分配.
|
||||
- 如果值不变, 可以在只读模式下配置设置.
|
||||
- 加了检索 MergeTree 引擎的非整数粒度的功能, 以满足对 `preferred_block_size_bytes` 设置中指定的块大小的限制. 目的是在处理来自大列的表的查询时减少RAM的消耗并增加缓存局部性.
|
||||
- 有效地使用包含像 `toStartOfHour(x)` 这样的表达式的索引来处理像 `toStartOfHour(x) op сonstexpr` 这样的条件.
|
||||
- 添加了 MergeTree 引擎的新设置(config.xml 中的 merge_tree 部分):
|
||||
- `replicad_deduplication_window_seconds` 设置允许在复制表中删除重复插入的秒数.
|
||||
- `cleanup_delay_period` 设置启动清理以删除过时数据的频率.
|
||||
- `Replicationd_can_become_leader` 可以防止副本成为领导者(并分配合并).
|
||||
- 加速清理以从 ZooKeeper 中删除过时的数据.
|
||||
- 集群 DDL 查询的多项改进和修复. 特别有趣的是新设置 `distributed_ddl_task_timeout`, 它限制了等待集群中服务器响应的时间. 如果 ddl 请求没有在所有主机上执行,响应将包含超时错误并且请求将以异步模式执行.
|
||||
- 改进了服务器日志中堆栈跟踪的显示.
|
||||
- 为压缩方法添加了 "none" 值.
|
||||
- 您可以在 config.xml 中使用多个dictionaries_config 部分.
|
||||
- 可以通过文件系统中的套接字连接到 MySQL.
|
||||
- `system.parts` 表有一个新列, 其中包含有关标记大小的信息(以字节为单位).
|
||||
|
||||
#### Bug 修复: {#bug-fixes-4}
|
||||
|
||||
- 使用 Merge 表的分布式表现在可以正确用于带有 `_table` 字段条件的 SELECT 查询.
|
||||
- 修复了检查数据部分时 ReplicatedMergeTree 中罕见的竞争条件.
|
||||
- 修复了启动服务器时 `leader election` 可能会冻结的问题.
|
||||
- 使用数据源的本地副本时,将忽略 `max_replica_delay_for_distributed_queries` 设置. 这已被修复.
|
||||
- 修复了尝试清理不存在的列时 `ALTER TABLE CLEAR COLUMN IN PARTITION` 的错误行为.
|
||||
- 修复了 multiIf 函数中使用空数组或字符串时的异常.
|
||||
- 修复了反序列化本机格式时过多的内存分配.
|
||||
- 修复了 Trie 词典的错误自动更新.
|
||||
- 修复了在使用 SAMPLE 时从合并表中使用 GROUP BY 子句运行查询时的异常.
|
||||
- 修复了 `distributed_aggregation_memory_efficient=1` 时 GROUP BY 的崩溃.
|
||||
- 现在可以在 IN 和 JOIN 右侧指定 `database.table`.
|
||||
- 太多线程用于并行聚合. 这已被修复.
|
||||
- 修复了 `if` 函数如何与 FixedString 参数一起工作.
|
||||
- 对于权重为 0 的分片, SELECT 在分布式表中工作不正确. 这已得到修复.
|
||||
- 运行 `CREATE VIEW IF EXISTS 不再导致崩溃` .
|
||||
- 修复了设置 `input_format_skip_unknown_fields=1` 且存在负数时的错误行为.
|
||||
- 修复了如果字典中有一些无效数据, `dictGetHierarchy()` 函数中的无限循环.
|
||||
- 修复了使用 IN 或 JOIN 子句和合并表中的子查询运行分布式查询时的 `Syntax error: unexpected (...)` 错误.
|
||||
- 修复了对字典表中 SELECT 查询的错误解释.
|
||||
- 修复了在超过 20 亿元素的 IN 和 JOIN 子句中使用数组时的 "Cannot mremap" 错误.
|
||||
- 修复了以 MySQL 为源的字典的故障转移.
|
||||
|
||||
#### 改进了开发和组装ClickHouse的工作流: {#improved-workflow-for-developing-and-assembling-clickhouse-1}
|
||||
|
||||
- 可以在 Arcadia 中组装 Builds.
|
||||
- 可以使用 gcc 7 编译 ClickHouse.
|
||||
- 使用 ccache+distcc 的并行构建现在更快了.
|
||||
|
||||
### ClickHouse 版本 1.1.54245, 2017-07-04 {#clickhouse-release-1-1-54245-2017-07-04}
|
||||
|
||||
#### 新特征: {#new-features-5}
|
||||
|
||||
- 分布式 DDL(例如, `REATE TABLE ON CLUSTER`).
|
||||
- 复制查询 `ALTER TABLE CLEAR COLUMN IN PARTITION.` .
|
||||
- 字典表引擎 (以表的形式访问字典数据).
|
||||
- 字典数据库引擎 (这种类型的数据库自动为所有连接的外部字典提供字典表).
|
||||
- 您可以通过向源发送请求来检查字典的更新.
|
||||
- 合格的列名.
|
||||
- 使用双引号引用标识符.
|
||||
- HTTP 接口中的会话.
|
||||
- 复制表的 OPTIMIZE 查询不仅可以在领导者上运行.
|
||||
|
||||
#### 向后不兼容的变化: {#backward-incompatible-changes-2}
|
||||
|
||||
- 删除了 SET GLOBAL.
|
||||
|
||||
#### 次要更改: {#minor-changes}
|
||||
|
||||
- 目前在触发警报后,日志会打印完整的堆栈跟踪.
|
||||
- 放宽了启动时损坏/额外数据部分数量的验证(误报太多).
|
||||
|
||||
#### Bug 修复: {#bug-fixes-5}
|
||||
|
||||
- 修复了插入分布式表时的错误连接"卡住"问题.
|
||||
- GLOBAL IN 现在适用于来自查看分布式表的合并表的查询.
|
||||
- 在 Google Compute Engine 虚拟机上检测到的内核数不正确. 这已被修复.
|
||||
- 缓存外部字典的可执行源的工作方式发生了变化.
|
||||
- 修正了包含空字符的字符串的比较.
|
||||
- 修复了 Float32 主键字段与常量的比较.
|
||||
- 以前,对字段大小的错误估计可能会导致分配过大.
|
||||
- 修复了使用 ALTER 查询添加到表中的 Nullable 列时发生的崩溃.
|
||||
- 修复了当行数小于 LIMIT 时按 Nullable 列排序时崩溃的问题.
|
||||
- 修复了仅由常量值组成的 ORDER BY 子查询.
|
||||
- 以前,复制表在 DROP TABLE 失败后可能保持无效状态.
|
||||
- 结果为空的标量子查询的别名不再丢失.
|
||||
- 现在,如果 .so 文件损坏,使用编译的查询不会因错误而失败.
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -133,11 +133,11 @@ func TestConfigFileFrameCopy(t *testing.T) {
|
||||
require.Empty(t, errs)
|
||||
i := 0
|
||||
sizes := map[string]int64{
|
||||
"users.xml": int64(2039),
|
||||
"users.xml": int64(2017),
|
||||
"default-password.xml": int64(188),
|
||||
"config.xml": int64(61282),
|
||||
"config.xml": int64(61260),
|
||||
"server-include.xml": int64(168),
|
||||
"user-include.xml": int64(582),
|
||||
"user-include.xml": int64(559),
|
||||
}
|
||||
var checkedFiles []string
|
||||
for {
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0" ?>
|
||||
<clickhouse>
|
||||
<test_user>
|
||||
<networks>
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
NOTE: User and query level settings are set up in "users.xml" file.
|
||||
If you have accidentally specified user-level settings here, server won't start.
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<!-- See also the files in users.d directory where the settings can be overridden. -->
|
||||
<!-- Profiles of settings. -->
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
NOTE: User and query level settings are set up in "users.xml" file.
|
||||
If you have accidentally specified user-level settings here, server won't start.
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0" ?>
|
||||
<clickhouse>
|
||||
<listen_host>::</listen_host>
|
||||
<listen_host>0.0.0.0</listen_host>
|
||||
|
@ -833,7 +833,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (params.frequency_desaturate)
|
||||
if (params.frequency_desaturate > 0.0)
|
||||
{
|
||||
for (auto & elem : table)
|
||||
{
|
||||
@ -846,7 +846,7 @@ public:
|
||||
UInt64 new_total = 0;
|
||||
for (auto & bucket : histogram.buckets)
|
||||
{
|
||||
bucket.second = bucket.second * (1.0 - params.frequency_desaturate) + average * params.frequency_desaturate;
|
||||
bucket.second = static_cast<UInt64>(bucket.second * (1.0 - params.frequency_desaturate) + average * params.frequency_desaturate);
|
||||
new_total += bucket.second;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
NOTE: User and query level settings are set up in "users.xml" file.
|
||||
If you have accidentally specified user-level settings here, server won't start.
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0"?>
|
||||
<!-- Config that is used when server is run without config file. -->
|
||||
<clickhouse>
|
||||
<logger>
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<users>
|
||||
<default>
|
||||
|
@ -1,4 +1,3 @@
|
||||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<!-- See also the files in users.d directory where the settings can be overridden. -->
|
||||
|
||||
@ -6,15 +5,6 @@
|
||||
<profiles>
|
||||
<!-- Default settings. -->
|
||||
<default>
|
||||
<!-- How to choose between replicas during distributed query processing.
|
||||
random - choose random replica from set of replicas with minimum number of errors
|
||||
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||
with minimum number of different symbols between replica's hostname and local hostname
|
||||
(Hamming distance).
|
||||
in_order - first live replica is chosen in specified order.
|
||||
first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
|
||||
-->
|
||||
<load_balancing>random</load_balancing>
|
||||
</default>
|
||||
|
||||
<!-- Profile that allows only read queries. -->
|
||||
|
@ -1,8 +1,9 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
|
||||
#include <AggregateFunctions/QuantileReservoirSampler.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
@ -21,70 +22,6 @@ namespace
|
||||
template <typename Value, bool float_return> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<float_return, Float64, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<float_return, Float64, void>, true>;
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<float_return, Float64, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<float_return, Float64, void>, true>;
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantileExact, false, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantilesExact, false, void, true>;
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactLow = AggregateFunctionQuantile<Value, QuantileExactLow<Value>, NameQuantileExactLow, false, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactLow = AggregateFunctionQuantile<Value, QuantileExactLow<Value>, NameQuantilesExactLow, false, void, true>;
|
||||
template <typename Value, bool _> using FuncQuantileExactHigh = AggregateFunctionQuantile<Value, QuantileExactHigh<Value>, NameQuantileExactHigh, false, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactHigh = AggregateFunctionQuantile<Value, QuantileExactHigh<Value>, NameQuantilesExactHigh, false, void, true>;
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactExclusive = AggregateFunctionQuantile<Value, QuantileExactExclusive<Value>, NameQuantileExactExclusive, false, Float64, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactExclusive = AggregateFunctionQuantile<Value, QuantileExactExclusive<Value>, NameQuantilesExactExclusive, false, Float64, true>;
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactInclusive = AggregateFunctionQuantile<Value, QuantileExactInclusive<Value>, NameQuantileExactInclusive, false, Float64, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactInclusive = AggregateFunctionQuantile<Value, QuantileExactInclusive<Value>, NameQuantilesExactInclusive, false, Float64, true>;
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantileExactWeighted, true, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantilesExactWeighted, true, void, true>;
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileTiming = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTiming, false, Float32, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesTiming = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTiming, false, Float32, true>;
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTimingWeighted, true, Float32, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTimingWeighted, true, Float32, true>;
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<float_return, Float32, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<float_return, Float32, void>, true>;
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true>;
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true>;
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
constexpr bool supportDecimal()
|
||||
{
|
||||
return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantileExact<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantileExactLow<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantileExactHigh<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantilesExact<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantilesExactLow<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantilesExactHigh<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantileExactWeighted<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantilesExactWeighted<Float32, false>>;
|
||||
}
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
constexpr bool supportBigInt()
|
||||
{
|
||||
return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantileExact<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantilesExact<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantileExactWeighted<Float32, false>> ||
|
||||
std::is_same_v<Function<Float32, false>, FuncQuantilesExactWeighted<Float32, false>>;
|
||||
}
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
@ -102,22 +39,16 @@ AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
if constexpr (supportDecimal<Function>())
|
||||
{
|
||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||
}
|
||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||
|
||||
if constexpr (supportBigInt<Function>())
|
||||
{
|
||||
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
|
||||
}
|
||||
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
@ -133,58 +64,8 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
|
||||
factory.registerFunction(NameQuantile::name, createAggregateFunctionQuantile<FuncQuantile>);
|
||||
factory.registerFunction(NameQuantiles::name, { createAggregateFunctionQuantile<FuncQuantiles>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileDeterministic::name, createAggregateFunctionQuantile<FuncQuantileDeterministic>);
|
||||
factory.registerFunction(NameQuantilesDeterministic::name, { createAggregateFunctionQuantile<FuncQuantilesDeterministic>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileExact::name, createAggregateFunctionQuantile<FuncQuantileExact>);
|
||||
factory.registerFunction(NameQuantilesExact::name, { createAggregateFunctionQuantile<FuncQuantilesExact>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileExactLow::name, createAggregateFunctionQuantile<FuncQuantileExactLow>);
|
||||
factory.registerFunction(NameQuantilesExactLow::name, { createAggregateFunctionQuantile<FuncQuantilesExactLow>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileExactHigh::name, createAggregateFunctionQuantile<FuncQuantileExactHigh>);
|
||||
factory.registerFunction(NameQuantilesExactHigh::name, { createAggregateFunctionQuantile<FuncQuantilesExactHigh>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileExactExclusive::name, createAggregateFunctionQuantile<FuncQuantileExactExclusive>);
|
||||
factory.registerFunction(NameQuantilesExactExclusive::name, { createAggregateFunctionQuantile<FuncQuantilesExactExclusive>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileExactInclusive::name, createAggregateFunctionQuantile<FuncQuantileExactInclusive>);
|
||||
factory.registerFunction(NameQuantilesExactInclusive::name, { createAggregateFunctionQuantile<FuncQuantilesExactInclusive>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileExactWeighted::name, createAggregateFunctionQuantile<FuncQuantileExactWeighted>);
|
||||
factory.registerFunction(NameQuantilesExactWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesExactWeighted>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileTiming::name, createAggregateFunctionQuantile<FuncQuantileTiming>);
|
||||
factory.registerFunction(NameQuantilesTiming::name, { createAggregateFunctionQuantile<FuncQuantilesTiming>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileTimingWeighted::name, createAggregateFunctionQuantile<FuncQuantileTimingWeighted>);
|
||||
factory.registerFunction(NameQuantilesTimingWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesTimingWeighted>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileTDigest::name, createAggregateFunctionQuantile<FuncQuantileTDigest>);
|
||||
factory.registerFunction(NameQuantilesTDigest::name, { createAggregateFunctionQuantile<FuncQuantilesTDigest>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileTDigestWeighted::name, createAggregateFunctionQuantile<FuncQuantileTDigestWeighted>);
|
||||
factory.registerFunction(NameQuantilesTDigestWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesTDigestWeighted>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile<FuncQuantileBFloat16>);
|
||||
factory.registerFunction(NameQuantilesBFloat16::name, { createAggregateFunctionQuantile<FuncQuantilesBFloat16>, properties });
|
||||
|
||||
factory.registerFunction(NameQuantileBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantileBFloat16Weighted>);
|
||||
factory.registerFunction(NameQuantilesBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantilesBFloat16Weighted>);
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("median", NameQuantile::name);
|
||||
factory.registerAlias("medianDeterministic", NameQuantileDeterministic::name);
|
||||
factory.registerAlias("medianExact", NameQuantileExact::name);
|
||||
factory.registerAlias("medianExactLow", NameQuantileExactLow::name);
|
||||
factory.registerAlias("medianExactHigh", NameQuantileExactHigh::name);
|
||||
factory.registerAlias("medianExactWeighted", NameQuantileExactWeighted::name);
|
||||
factory.registerAlias("medianTiming", NameQuantileTiming::name);
|
||||
factory.registerAlias("medianTimingWeighted", NameQuantileTimingWeighted::name);
|
||||
factory.registerAlias("medianTDigest", NameQuantileTDigest::name);
|
||||
factory.registerAlias("medianTDigestWeighted", NameQuantileTDigestWeighted::name);
|
||||
factory.registerAlias("medianBFloat16", NameQuantileBFloat16::name);
|
||||
factory.registerAlias("medianBFloat16Weighted", NameQuantileBFloat16Weighted::name);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,24 +2,12 @@
|
||||
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
|
||||
/// These must be exposed in header for the purpose of dynamic compilation.
|
||||
#include <AggregateFunctions/QuantileReservoirSampler.h>
|
||||
#include <AggregateFunctions/QuantileReservoirSamplerDeterministic.h>
|
||||
#include <AggregateFunctions/QuantileExact.h>
|
||||
#include <AggregateFunctions/QuantileExactWeighted.h>
|
||||
#include <AggregateFunctions/QuantileTiming.h>
|
||||
#include <AggregateFunctions/QuantileTDigest.h>
|
||||
#include <AggregateFunctions/QuantileBFloat16Histogram.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/QuantilesCommon.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
60
src/AggregateFunctions/AggregateFunctionQuantileBFloat16.cpp
Normal file
60
src/AggregateFunctions/AggregateFunctionQuantileBFloat16.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileBFloat16Histogram.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile<FuncQuantileBFloat16>);
|
||||
factory.registerFunction(NameQuantilesBFloat16::name, { createAggregateFunctionQuantile<FuncQuantilesBFloat16>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianBFloat16", NameQuantileBFloat16::name);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileBFloat16Histogram.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction(NameQuantileBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantileBFloat16Weighted>);
|
||||
factory.registerFunction(NameQuantilesBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantilesBFloat16Weighted>);
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianBFloat16Weighted", NameQuantileBFloat16Weighted::name);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileReservoirSamplerDeterministic.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<float_return, Float64, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<float_return, Float64, void>, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileDeterministic::name, createAggregateFunctionQuantile<FuncQuantileDeterministic>);
|
||||
factory.registerFunction(NameQuantilesDeterministic::name, { createAggregateFunctionQuantile<FuncQuantilesDeterministic>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianDeterministic", NameQuantileDeterministic::name);
|
||||
}
|
||||
|
||||
}
|
72
src/AggregateFunctions/AggregateFunctionQuantileExact.cpp
Normal file
72
src/AggregateFunctions/AggregateFunctionQuantileExact.cpp
Normal file
@ -0,0 +1,72 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileExact.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantileExact, false, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantilesExact, false, void, true>;
|
||||
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileExact::name, createAggregateFunctionQuantile<FuncQuantileExact>);
|
||||
factory.registerFunction(NameQuantilesExact::name, { createAggregateFunctionQuantile<FuncQuantilesExact>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianExact", NameQuantileExact::name);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileExact.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactExclusive = AggregateFunctionQuantile<Value, QuantileExactExclusive<Value>, NameQuantileExactExclusive, false, Float64, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactExclusive = AggregateFunctionQuantile<Value, QuantileExactExclusive<Value>, NameQuantilesExactExclusive, false, Float64, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileExactExclusive(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileExactExclusive::name, createAggregateFunctionQuantile<FuncQuantileExactExclusive>);
|
||||
factory.registerFunction(NameQuantilesExactExclusive::name, { createAggregateFunctionQuantile<FuncQuantilesExactExclusive>, properties });
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileExact.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactHigh = AggregateFunctionQuantile<Value, QuantileExactHigh<Value>, NameQuantileExactHigh, false, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactHigh = AggregateFunctionQuantile<Value, QuantileExactHigh<Value>, NameQuantilesExactHigh, false, void, true>;
|
||||
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileExactHigh(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileExactHigh::name, createAggregateFunctionQuantile<FuncQuantileExactHigh>);
|
||||
factory.registerFunction(NameQuantilesExactHigh::name, { createAggregateFunctionQuantile<FuncQuantilesExactHigh>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianExactHigh", NameQuantileExactHigh::name);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileExact.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactInclusive = AggregateFunctionQuantile<Value, QuantileExactInclusive<Value>, NameQuantileExactInclusive, false, Float64, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactInclusive = AggregateFunctionQuantile<Value, QuantileExactInclusive<Value>, NameQuantilesExactInclusive, false, Float64, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileExactInclusive(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileExactInclusive::name, createAggregateFunctionQuantile<FuncQuantileExactInclusive>);
|
||||
factory.registerFunction(NameQuantilesExactInclusive::name, { createAggregateFunctionQuantile<FuncQuantilesExactInclusive>, properties });
|
||||
}
|
||||
|
||||
}
|
66
src/AggregateFunctions/AggregateFunctionQuantileExactLow.cpp
Normal file
66
src/AggregateFunctions/AggregateFunctionQuantileExactLow.cpp
Normal file
@ -0,0 +1,66 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileExact.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactLow = AggregateFunctionQuantile<Value, QuantileExactLow<Value>, NameQuantileExactLow, false, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactLow = AggregateFunctionQuantile<Value, QuantileExactLow<Value>, NameQuantilesExactLow, false, void, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileExactLow(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileExactLow::name, createAggregateFunctionQuantile<FuncQuantileExactLow>);
|
||||
factory.registerFunction(NameQuantilesExactLow::name, { createAggregateFunctionQuantile<FuncQuantilesExactLow>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianExactLow", NameQuantileExactLow::name);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,71 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileExactWeighted.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantileExactWeighted, true, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesExactWeighted = AggregateFunctionQuantile<Value, QuantileExactWeighted<Value>, NameQuantilesExactWeighted, true, void, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileExactWeighted::name, createAggregateFunctionQuantile<FuncQuantileExactWeighted>);
|
||||
factory.registerFunction(NameQuantilesExactWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesExactWeighted>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianExactWeighted", NameQuantileExactWeighted::name);
|
||||
}
|
||||
|
||||
}
|
60
src/AggregateFunctions/AggregateFunctionQuantileTDigest.cpp
Normal file
60
src/AggregateFunctions/AggregateFunctionQuantileTDigest.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileTDigest.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<float_return, Float32, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<float_return, Float32, void>, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileTDigest::name, createAggregateFunctionQuantile<FuncQuantileTDigest>);
|
||||
factory.registerFunction(NameQuantilesTDigest::name, { createAggregateFunctionQuantile<FuncQuantilesTDigest>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianTDigest", NameQuantileTDigest::name);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileTDigest.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false>;
|
||||
template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileTDigestWeighted::name, createAggregateFunctionQuantile<FuncQuantileTDigestWeighted>);
|
||||
factory.registerFunction(NameQuantilesTDigestWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesTDigestWeighted>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianTDigestWeighted", NameQuantileTDigestWeighted::name);
|
||||
}
|
||||
|
||||
}
|
60
src/AggregateFunctions/AggregateFunctionQuantileTiming.cpp
Normal file
60
src/AggregateFunctions/AggregateFunctionQuantileTiming.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileTiming.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileTiming = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTiming, false, Float32, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesTiming = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTiming, false, Float32, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileTiming(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileTiming::name, createAggregateFunctionQuantile<FuncQuantileTiming>);
|
||||
factory.registerFunction(NameQuantilesTiming::name, { createAggregateFunctionQuantile<FuncQuantilesTiming>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianTiming", NameQuantileTiming::name);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileTiming.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTimingWeighted, true, Float32, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTimingWeighted, true, Float32, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileTimingWeighted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileTimingWeighted::name, createAggregateFunctionQuantile<FuncQuantileTimingWeighted>);
|
||||
factory.registerFunction(NameQuantilesTimingWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesTimingWeighted>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianTimingWeighted", NameQuantileTimingWeighted::name);
|
||||
}
|
||||
|
||||
}
|
@ -519,7 +519,7 @@ private:
|
||||
template <typename FieldType>
|
||||
bool compareImpl(FieldType & x) const
|
||||
{
|
||||
auto val = get<FieldType>(rhs);
|
||||
auto val = rhs.get<FieldType>();
|
||||
if (val > x)
|
||||
{
|
||||
x = val;
|
||||
@ -554,7 +554,7 @@ private:
|
||||
template <typename FieldType>
|
||||
bool compareImpl(FieldType & x) const
|
||||
{
|
||||
auto val = get<FieldType>(rhs);
|
||||
auto val = rhs.get<FieldType>();
|
||||
if (val < x)
|
||||
{
|
||||
x = val;
|
||||
|
@ -81,7 +81,7 @@ class QuantileTDigest
|
||||
*/
|
||||
struct Params
|
||||
{
|
||||
Value epsilon = 0.01;
|
||||
Value epsilon = 0.01f;
|
||||
size_t max_centroids = 2048;
|
||||
size_t max_unmerged = 2048;
|
||||
};
|
||||
@ -99,13 +99,12 @@ class QuantileTDigest
|
||||
BetterFloat count = 0;
|
||||
size_t unmerged = 0;
|
||||
|
||||
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
|
||||
*/
|
||||
/// Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
|
||||
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
|
||||
{
|
||||
/// Symmetric interpolation for better results with infinities.
|
||||
double k = (x - x1) / (x2 - x1);
|
||||
return (1 - k) * y1 + k * y2;
|
||||
return static_cast<Value>((1 - k) * y1 + k * y2);
|
||||
}
|
||||
|
||||
struct RadixSortTraits
|
||||
|
@ -178,7 +178,7 @@ namespace detail
|
||||
if (!elems.empty())
|
||||
{
|
||||
size_t n = level < 1
|
||||
? level * elems.size()
|
||||
? static_cast<size_t>(level * elems.size())
|
||||
: (elems.size() - 1);
|
||||
|
||||
/// Sorting an array will not be considered a violation of constancy.
|
||||
@ -201,7 +201,7 @@ namespace detail
|
||||
auto level = levels[level_index];
|
||||
|
||||
size_t n = level < 1
|
||||
? level * elems.size()
|
||||
? static_cast<size_t>(level * elems.size())
|
||||
: (elems.size() - 1);
|
||||
|
||||
::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
|
@ -18,6 +18,19 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactLow(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactHigh(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactInclusive(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactExclusive(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileTiming(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileTimingWeighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRate(AggregateFunctionFactory &);
|
||||
@ -89,6 +102,19 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionGroupUniqArray(factory);
|
||||
registerAggregateFunctionGroupArrayInsertAt(factory);
|
||||
registerAggregateFunctionsQuantile(factory);
|
||||
registerAggregateFunctionsQuantileDeterministic(factory);
|
||||
registerAggregateFunctionsQuantileExact(factory);
|
||||
registerAggregateFunctionsQuantileExactWeighted(factory);
|
||||
registerAggregateFunctionsQuantileExactLow(factory);
|
||||
registerAggregateFunctionsQuantileExactHigh(factory);
|
||||
registerAggregateFunctionsQuantileExactInclusive(factory);
|
||||
registerAggregateFunctionsQuantileExactExclusive(factory);
|
||||
registerAggregateFunctionsQuantileTiming(factory);
|
||||
registerAggregateFunctionsQuantileTimingWeighted(factory);
|
||||
registerAggregateFunctionsQuantileTDigest(factory);
|
||||
registerAggregateFunctionsQuantileTDigestWeighted(factory);
|
||||
registerAggregateFunctionsQuantileBFloat16(factory);
|
||||
registerAggregateFunctionsQuantileBFloat16Weighted(factory);
|
||||
registerAggregateFunctionsSequenceMatch(factory);
|
||||
registerAggregateFunctionWindowFunnel(factory);
|
||||
registerAggregateFunctionRate(factory);
|
||||
|
37
src/Backups/BackupEntryWrappedWith.h
Normal file
37
src/Backups/BackupEntryWrappedWith.h
Normal file
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Wraps another backup entry and a value of any type.
|
||||
template <typename T>
|
||||
class BackupEntryWrappedWith : public IBackupEntry
|
||||
{
|
||||
public:
|
||||
BackupEntryWrappedWith(BackupEntryPtr entry_, const T & custom_value_) : entry(entry_), custom_value(custom_value_) { }
|
||||
BackupEntryWrappedWith(BackupEntryPtr entry_, T && custom_value_) : entry(entry_), custom_value(std::move(custom_value_)) { }
|
||||
~BackupEntryWrappedWith() override = default;
|
||||
|
||||
UInt64 getSize() const override { return entry->getSize(); }
|
||||
std::optional<UInt128> getChecksum() const override { return entry->getChecksum(); }
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override { return entry->getReadBuffer(); }
|
||||
String getFilePath() const override { return entry->getFilePath(); }
|
||||
DiskPtr tryGetDiskIfExists() const override { return entry->tryGetDiskIfExists(); }
|
||||
DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); }
|
||||
|
||||
private:
|
||||
BackupEntryPtr entry;
|
||||
T custom_value;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void wrapBackupEntriesWith(std::vector<std::pair<String, BackupEntryPtr>> & backup_entries, const T & custom_value)
|
||||
{
|
||||
for (auto & [_, backup_entry] : backup_entries)
|
||||
backup_entry = std::make_shared<BackupEntryWrappedWith<T>>(std::move(backup_entry), custom_value);
|
||||
}
|
||||
|
||||
}
|
@ -42,6 +42,14 @@ endif ()
|
||||
# See `src/Common/TargetSpecific.h`
|
||||
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
|
||||
|
||||
if (NO_SSE3_OR_HIGHER)
|
||||
# Optimized x86 code in DECLARE_*_SPECIFIC_CODE blocks (see `src/Common/TargetSpecific.h`) is sometimes marked FORCE_INLINE. As a
|
||||
# result, its instruction set requirements (e.g. SSE4.2) leak into generic code. This is normally not a problem for standard x86 builds
|
||||
# because generic code is compiled with SSE 4.2 anyways. But it breaks SSE2-only builds. Therefore disabling the multitarget code
|
||||
# machinery and always use generic code. (The cleaner alternative is removing FORCE_INLINE but that impacts performance too much.)
|
||||
set(ENABLE_MULTITARGET_CODE OFF)
|
||||
endif()
|
||||
|
||||
if (ENABLE_MULTITARGET_CODE)
|
||||
add_definitions(-DENABLE_MULTITARGET_CODE=1)
|
||||
else()
|
||||
|
@ -16,6 +16,7 @@ namespace ErrorCodes
|
||||
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||
extern const int NETWORK_ERROR;
|
||||
extern const int SOCKET_TIMEOUT;
|
||||
extern const int DNS_ERROR;
|
||||
}
|
||||
|
||||
ConnectionEstablisher::ConnectionEstablisher(
|
||||
@ -90,6 +91,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT
|
||||
&& e.code() != ErrorCodes::DNS_ERROR
|
||||
&& e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
|
||||
throw;
|
||||
|
||||
|
@ -54,7 +54,7 @@ Field QueryFuzzer::getRandomField(int type)
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
static constexpr float values[]
|
||||
static constexpr double values[]
|
||||
= {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999,
|
||||
1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20,
|
||||
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
|
||||
|
@ -607,7 +607,7 @@ MutableColumns ColumnAggregateFunction::scatter(IColumn::ColumnIndex num_columns
|
||||
size_t num_rows = size();
|
||||
|
||||
{
|
||||
size_t reserve_size = static_cast<double>(num_rows) / num_columns * 1.1; /// 1.1 is just a guess. Better to use n-sigma rule.
|
||||
size_t reserve_size = static_cast<size_t>(static_cast<double>(num_rows) / num_columns * 1.1); /// 1.1 is just a guess. Better to use n-sigma rule.
|
||||
|
||||
if (reserve_size > 1)
|
||||
for (auto & column : columns)
|
||||
|
@ -50,7 +50,7 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr &&
|
||||
if (!offsets_concrete)
|
||||
throw Exception("offsets_column must be a ColumnUInt64", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (!offsets_concrete->empty() && data)
|
||||
if (!offsets_concrete->empty() && data && !data->empty())
|
||||
{
|
||||
Offset last_offset = offsets_concrete->getData().back();
|
||||
|
||||
@ -141,7 +141,7 @@ void ColumnArray::get(size_t n, Field & res) const
|
||||
size, max_array_size_as_field);
|
||||
|
||||
res = Array();
|
||||
Array & res_arr = DB::get<Array &>(res);
|
||||
Array & res_arr = res.get<Array &>();
|
||||
res_arr.reserve(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -296,7 +296,7 @@ void ColumnArray::updateHashFast(SipHash & hash) const
|
||||
|
||||
void ColumnArray::insert(const Field & x)
|
||||
{
|
||||
const Array & array = DB::get<const Array &>(x);
|
||||
const Array & array = x.get<const Array &>();
|
||||
size_t size = array.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
getData().insert(array[i]);
|
||||
|
@ -63,7 +63,7 @@ public:
|
||||
{
|
||||
data.resize_fill(data.size() + length);
|
||||
}
|
||||
void insert(const Field & x) override { data.push_back(DB::get<T>(x)); }
|
||||
void insert(const Field & x) override { data.push_back(x.get<T>()); }
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
||||
void popBack(size_t n) override
|
||||
|
@ -59,7 +59,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const
|
||||
|
||||
void ColumnFixedString::insert(const Field & x)
|
||||
{
|
||||
const String & s = DB::get<const String &>(x);
|
||||
const String & s = x.get<const String &>();
|
||||
|
||||
if (s.size() > n)
|
||||
throw Exception("Too large string '" + s + "' for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE);
|
||||
|
@ -68,9 +68,9 @@ public:
|
||||
UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); }
|
||||
UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); }
|
||||
Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); }
|
||||
Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); }
|
||||
Float32 getFloat32(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat32(n)); }
|
||||
bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); }
|
||||
Float64 getFloat64(size_t n) const override { return getDictionary().getFloat64(getIndexes().getUInt(n)); }
|
||||
Float32 getFloat32(size_t n) const override { return getDictionary().getFloat32(getIndexes().getUInt(n)); }
|
||||
bool getBool(size_t n) const override { return getDictionary().getBool(getIndexes().getUInt(n)); }
|
||||
bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
|
||||
ColumnPtr cut(size_t start, size_t length) const override
|
||||
{
|
||||
|
@ -74,7 +74,7 @@ void ColumnMap::get(size_t n, Field & res) const
|
||||
size_t size = offsets[n] - offsets[n - 1];
|
||||
|
||||
res = Map();
|
||||
auto & map = DB::get<Map &>(res);
|
||||
auto & map = res.get<Map &>();
|
||||
map.reserve(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -98,7 +98,7 @@ void ColumnMap::insertData(const char *, size_t)
|
||||
|
||||
void ColumnMap::insert(const Field & x)
|
||||
{
|
||||
const auto & map = DB::get<const Map &>(x);
|
||||
const auto & map = x.get<const Map &>();
|
||||
nested->insert(Array(map.begin(), map.end()));
|
||||
}
|
||||
|
||||
|
@ -128,7 +128,7 @@ public:
|
||||
|
||||
void insert(const Field & x) override
|
||||
{
|
||||
const String & s = DB::get<const String &>(x);
|
||||
const String & s = x.get<const String &>();
|
||||
const size_t old_size = chars.size();
|
||||
const size_t size_to_append = s.size() + 1;
|
||||
const size_t new_size = old_size + size_to_append;
|
||||
|
@ -109,7 +109,7 @@ void ColumnTuple::get(size_t n, Field & res) const
|
||||
const size_t tuple_size = columns.size();
|
||||
|
||||
res = Tuple();
|
||||
Tuple & res_tuple = DB::get<Tuple &>(res);
|
||||
Tuple & res_tuple = res.get<Tuple &>();
|
||||
res_tuple.reserve(tuple_size);
|
||||
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
@ -137,7 +137,7 @@ void ColumnTuple::insertData(const char *, size_t)
|
||||
|
||||
void ColumnTuple::insert(const Field & x)
|
||||
{
|
||||
const auto & tuple = DB::get<const Tuple &>(x);
|
||||
const auto & tuple = x.get<const Tuple &>();
|
||||
|
||||
const size_t tuple_size = columns.size();
|
||||
if (tuple.size() != tuple_size)
|
||||
|
@ -90,7 +90,7 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
|
||||
|
||||
while (begin < end)
|
||||
{
|
||||
*hash_data = intHashCRC32(*begin, *hash_data);
|
||||
*hash_data = hashCRC32(*begin, *hash_data);
|
||||
++begin;
|
||||
++hash_data;
|
||||
}
|
||||
@ -918,7 +918,7 @@ ColumnPtr ColumnVector<T>::createWithOffsets(const IColumn::Offsets & offsets, c
|
||||
auto res = this->create();
|
||||
auto & res_data = res->getData();
|
||||
|
||||
T default_value = safeGet<T>(default_field);
|
||||
T default_value = default_field.safeGet<T>();
|
||||
res_data.resize_fill(total_rows, default_value);
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
res_data[offsets[i]] = data[i + shift];
|
||||
|
@ -301,7 +301,7 @@ public:
|
||||
|
||||
void insert(const Field & x) override
|
||||
{
|
||||
data.push_back(DB::get<T>(x));
|
||||
data.push_back(x.get<T>());
|
||||
}
|
||||
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
@ -54,7 +54,7 @@ std::vector<IColumn::MutablePtr> IColumn::scatterImpl(ColumnIndex num_columns,
|
||||
column = cloneEmpty();
|
||||
|
||||
{
|
||||
size_t reserve_size = num_rows * 1.1 / num_columns; /// 1.1 is just a guess. Better to use n-sigma rule.
|
||||
size_t reserve_size = static_cast<size_t>(num_rows * 1.1 / num_columns); /// 1.1 is just a guess. Better to use n-sigma rule.
|
||||
|
||||
if (reserve_size > 1)
|
||||
for (auto & column : columns)
|
||||
|
@ -119,7 +119,7 @@ size_t extractMaskNumericImpl(
|
||||
(*nulls)[i] = 1;
|
||||
}
|
||||
else
|
||||
value = !!data[index];
|
||||
value = static_cast<bool>(data[index]);
|
||||
|
||||
if constexpr (inverted)
|
||||
value = !value;
|
||||
@ -335,4 +335,3 @@ void copyMask(const PaddedPODArray<UInt8> & from, PaddedPODArray<UInt8> & to)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -31,7 +31,7 @@ inline size_t roundUpToPowerOfTwoOrZero(size_t n)
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline size_t getLeadingZeroBitsUnsafe(T x)
|
||||
inline uint32_t getLeadingZeroBitsUnsafe(T x)
|
||||
{
|
||||
assert(x != 0);
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <Poco/DOM/Text.h>
|
||||
#include <Poco/DOM/Attr.h>
|
||||
#include <Poco/DOM/Comment.h>
|
||||
#include <Poco/XML/XMLWriter.h>
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
@ -729,7 +730,11 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
|
||||
if (!preprocessed_path_parent.empty())
|
||||
fs::create_directories(preprocessed_path_parent);
|
||||
}
|
||||
DOMWriter().writeNode(preprocessed_path, loaded_config.preprocessed_xml);
|
||||
DOMWriter writer;
|
||||
writer.setNewLine("\n");
|
||||
writer.setIndent(" ");
|
||||
writer.setOptions(Poco::XML::XMLWriter::PRETTY_PRINT);
|
||||
writer.writeNode(preprocessed_path, loaded_config.preprocessed_xml);
|
||||
LOG_DEBUG(log, "Saved preprocessed configuration to '{}'.", preprocessed_path);
|
||||
}
|
||||
catch (Poco::Exception & e)
|
||||
|
@ -26,114 +26,107 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_PARSE_YAML;
|
||||
}
|
||||
|
||||
/// A prefix symbol in yaml key
|
||||
/// We add attributes to nodes by using a prefix symbol in the key part.
|
||||
/// Currently we use @ as a prefix symbol. Note, that @ is reserved
|
||||
/// by YAML standard, so we need to write a key-value pair like this: "@attribute": attr_value
|
||||
const char YAML_ATTRIBUTE_PREFIX = '@';
|
||||
|
||||
namespace
|
||||
{
|
||||
/// A prefix symbol in yaml key
|
||||
/// We add attributes to nodes by using a prefix symbol in the key part.
|
||||
/// Currently we use @ as a prefix symbol. Note, that @ is reserved
|
||||
/// by YAML standard, so we need to write a key-value pair like this: "@attribute": attr_value
|
||||
const char YAML_ATTRIBUTE_PREFIX = '@';
|
||||
|
||||
Poco::AutoPtr<Poco::XML::Element> createCloneNode(Poco::XML::Element & original_node)
|
||||
{
|
||||
Poco::AutoPtr<Poco::XML::Element> clone_node = original_node.ownerDocument()->createElement(original_node.nodeName());
|
||||
original_node.parentNode()->appendChild(clone_node);
|
||||
return clone_node;
|
||||
}
|
||||
|
||||
void processNode(const YAML::Node & node, Poco::XML::Element & parent_xml_element)
|
||||
{
|
||||
auto * xml_document = parent_xml_element.ownerDocument();
|
||||
switch (node.Type())
|
||||
Poco::AutoPtr<Poco::XML::Element> cloneXMLNode(const Poco::XML::Element & original_node)
|
||||
{
|
||||
case YAML::NodeType::Scalar:
|
||||
{
|
||||
std::string value = node.as<std::string>();
|
||||
Poco::AutoPtr<Poco::XML::Text> xml_value = xml_document->createTextNode(value);
|
||||
parent_xml_element.appendChild(xml_value);
|
||||
break;
|
||||
}
|
||||
Poco::AutoPtr<Poco::XML::Element> clone_node = original_node.ownerDocument()->createElement(original_node.nodeName());
|
||||
original_node.parentNode()->appendChild(clone_node);
|
||||
return clone_node;
|
||||
}
|
||||
|
||||
/// We process YAML Sequences as a
|
||||
/// list of <key>value</key> tags with same key and different values.
|
||||
/// For example, we translate this sequence
|
||||
/// seq:
|
||||
/// - val1
|
||||
/// - val2
|
||||
///
|
||||
/// into this:
|
||||
/// <seq>val1</seq>
|
||||
/// <seq>val2</seq>
|
||||
case YAML::NodeType::Sequence:
|
||||
void processNode(const YAML::Node & node, Poco::XML::Element & parent_xml_node)
|
||||
{
|
||||
auto * xml_document = parent_xml_node.ownerDocument();
|
||||
switch (node.Type())
|
||||
{
|
||||
for (const auto & child_node : node)
|
||||
/// For sequences it depends how we want to process them.
|
||||
/// Sequences of key-value pairs such as:
|
||||
/// seq:
|
||||
/// - k1: val1
|
||||
/// - k2: val2
|
||||
/// into xml like this:
|
||||
/// <seq>
|
||||
/// <k1>val1</k1>
|
||||
/// <k2>val2</k2>
|
||||
/// </seq>
|
||||
///
|
||||
/// But, if the sequence is just a list, the root-node needs to be repeated, such as:
|
||||
/// seq:
|
||||
/// - val1
|
||||
/// - val2
|
||||
/// into xml like this:
|
||||
/// <seq>val1</seq>
|
||||
/// <seq>val2</seq>
|
||||
///
|
||||
/// Therefore check what type the child is, for further processing.
|
||||
/// Mixing types (values list or map) will lead to strange results but should not happen.
|
||||
if (parent_xml_element.hasChildNodes() && !child_node.IsMap())
|
||||
{
|
||||
/// Create a new parent node with same tag for each child node
|
||||
processNode(child_node, *createCloneNode(parent_xml_element));
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Map, so don't recreate the parent node but add directly
|
||||
processNode(child_node, parent_xml_element);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case YAML::NodeType::Map:
|
||||
{
|
||||
for (const auto & key_value_pair : node)
|
||||
case YAML::NodeType::Scalar:
|
||||
{
|
||||
const auto & key_node = key_value_pair.first;
|
||||
const auto & value_node = key_value_pair.second;
|
||||
std::string key = key_node.as<std::string>();
|
||||
bool is_attribute = (key.starts_with(YAML_ATTRIBUTE_PREFIX) && value_node.IsScalar());
|
||||
if (is_attribute)
|
||||
{
|
||||
/// we use substr(1) here to remove YAML_ATTRIBUTE_PREFIX from key
|
||||
auto attribute_name = key.substr(1);
|
||||
std::string value = value_node.as<std::string>();
|
||||
parent_xml_element.setAttribute(attribute_name, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
|
||||
parent_xml_element.appendChild(xml_key);
|
||||
processNode(value_node, *xml_key);
|
||||
}
|
||||
std::string value = node.as<std::string>();
|
||||
Poco::AutoPtr<Poco::XML::Text> xml_value = xml_document->createTextNode(value);
|
||||
parent_xml_node.appendChild(xml_value);
|
||||
break;
|
||||
}
|
||||
|
||||
/// For sequences we repeat the parent xml node. For example,
|
||||
/// seq:
|
||||
/// - val1
|
||||
/// - val2
|
||||
/// is converted into the following xml:
|
||||
/// <seq>val1</seq>
|
||||
/// <seq>val2</seq>
|
||||
///
|
||||
/// A sequence of mappings is converted in the same way:
|
||||
/// seq:
|
||||
/// - k1: val1
|
||||
/// k2: val2
|
||||
/// - k3: val3
|
||||
/// is converted into the following xml:
|
||||
/// <seq><k1>val1</k1><k2>val2</k2></seq>
|
||||
/// <seq><k3>val3</k3></seq>
|
||||
case YAML::NodeType::Sequence:
|
||||
{
|
||||
size_t i = 0;
|
||||
for (auto it = node.begin(); it != node.end(); ++it, ++i)
|
||||
{
|
||||
const auto & child_node = *it;
|
||||
|
||||
bool need_clone_parent_xml_node = (i > 0);
|
||||
|
||||
if (need_clone_parent_xml_node)
|
||||
{
|
||||
/// Create a new parent node with same tag for each child node
|
||||
processNode(child_node, *cloneXMLNode(parent_xml_node));
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Map, so don't recreate the parent node but add directly
|
||||
processNode(child_node, parent_xml_node);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case YAML::NodeType::Map:
|
||||
{
|
||||
for (const auto & key_value_pair : node)
|
||||
{
|
||||
const auto & key_node = key_value_pair.first;
|
||||
const auto & value_node = key_value_pair.second;
|
||||
std::string key = key_node.as<std::string>();
|
||||
bool is_attribute = (key.starts_with(YAML_ATTRIBUTE_PREFIX) && value_node.IsScalar());
|
||||
if (is_attribute)
|
||||
{
|
||||
/// we use substr(1) here to remove YAML_ATTRIBUTE_PREFIX from key
|
||||
auto attribute_name = key.substr(1);
|
||||
std::string value = value_node.as<std::string>();
|
||||
parent_xml_node.setAttribute(attribute_name, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
|
||||
parent_xml_node.appendChild(xml_key);
|
||||
processNode(value_node, *xml_key);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case YAML::NodeType::Null: break;
|
||||
case YAML::NodeType::Undefined:
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_YAML, "YAMLParser has encountered node with undefined type and cannot continue parsing of the file");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case YAML::NodeType::Null: break;
|
||||
case YAML::NodeType::Undefined:
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_YAML, "YAMLParser has encountered node with undefined type and cannot continue parsing of the file");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Poco::AutoPtr<Poco::XML::Document> YAMLParser::parse(const String& path)
|
||||
{
|
||||
|
@ -577,7 +577,7 @@ public:
|
||||
/// also make the special timezones with no whole hour offset such as 'Australia/Lord_Howe' been taken into account.
|
||||
|
||||
LUTIndex index = findIndex(t);
|
||||
UInt32 time = t - lut[index].date;
|
||||
UInt32 time = static_cast<UInt32>(t - lut[index].date);
|
||||
|
||||
if (time >= lut[index].time_at_offset_change())
|
||||
time += lut[index].amount_of_offset_change();
|
||||
@ -618,33 +618,33 @@ public:
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
|
||||
inline UInt8 toMonth(DateOrTime v) const { return lut[toLUTIndex(v)].month; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toQuarter(DateOrTime v) const { return (lut[toLUTIndex(v)].month - 1) / 3 + 1; }
|
||||
inline UInt8 toQuarter(DateOrTime v) const { return (lut[toLUTIndex(v)].month - 1) / 3 + 1; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
|
||||
inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
|
||||
inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toDayOfYear(DateOrTime v) const
|
||||
inline UInt16 toDayOfYear(DateOrTime v) const
|
||||
{
|
||||
// TODO: different overload for ExtendedDayNum
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
return i + 1 - toFirstDayNumOfYearIndex(i);
|
||||
return static_cast<UInt16>(i + 1 - toFirstDayNumOfYearIndex(i));
|
||||
}
|
||||
|
||||
/// Number of week from some fixed moment in the past. Week begins at monday.
|
||||
/// (round down to monday and divide DayNum by 7; we made an assumption,
|
||||
/// that in domain of the function there was no weeks with any other number of days than 7)
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toRelativeWeekNum(DateOrTime v) const
|
||||
inline Int32 toRelativeWeekNum(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
/// We add 8 to avoid underflow at beginning of unix epoch.
|
||||
@ -653,7 +653,7 @@ public:
|
||||
|
||||
/// Get year that contains most of the current week. Week begins at monday.
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toISOYear(DateOrTime v) const
|
||||
inline Int16 toISOYear(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
/// That's effectively the year of thursday of current week.
|
||||
@ -694,7 +694,7 @@ public:
|
||||
/// ISO 8601 week number. Week begins at monday.
|
||||
/// The week number 1 is the first week in year that contains 4 or more days (that's more than half).
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toISOWeek(DateOrTime v) const
|
||||
inline UInt8 toISOWeek(DateOrTime v) const
|
||||
{
|
||||
return 1 + (toFirstDayNumOfWeek(v) - toDayNum(toFirstDayNumOfISOYearIndex(v))) / 7;
|
||||
}
|
||||
@ -751,38 +751,40 @@ public:
|
||||
|
||||
YearWeek yw(toYear(i), 0);
|
||||
UInt16 days = 0;
|
||||
const auto daynr = makeDayNum(yw.first, toMonth(i), toDayOfMonth(i));
|
||||
auto first_daynr = makeDayNum(yw.first, 1, 1);
|
||||
const auto day_number = makeDayNum(yw.first, toMonth(i), toDayOfMonth(i));
|
||||
auto first_day_number = makeDayNum(yw.first, 1, 1);
|
||||
|
||||
// 0 for monday, 1 for tuesday ...
|
||||
// get weekday from first day in year.
|
||||
UInt16 weekday = calc_weekday(first_daynr, !monday_first_mode);
|
||||
UInt8 weekday = calc_weekday(first_day_number, !monday_first_mode);
|
||||
|
||||
if (toMonth(i) == 1 && toDayOfMonth(i) <= static_cast<UInt32>(7 - weekday))
|
||||
{
|
||||
if (!week_year_mode && ((first_weekday_mode && weekday != 0) || (!first_weekday_mode && weekday >= 4)))
|
||||
return yw;
|
||||
week_year_mode = true;
|
||||
(yw.first)--;
|
||||
first_daynr -= (days = calc_days_in_year(yw.first));
|
||||
--yw.first;
|
||||
days = calc_days_in_year(yw.first);
|
||||
first_day_number -= days;
|
||||
weekday = (weekday + 53 * 7 - days) % 7;
|
||||
}
|
||||
|
||||
if ((first_weekday_mode && weekday != 0) || (!first_weekday_mode && weekday >= 4))
|
||||
days = daynr - (first_daynr + (7 - weekday));
|
||||
days = day_number - (first_day_number + (7 - weekday));
|
||||
else
|
||||
days = daynr - (first_daynr - weekday);
|
||||
days = day_number - (first_day_number - weekday);
|
||||
|
||||
if (week_year_mode && days >= 52 * 7)
|
||||
{
|
||||
weekday = (weekday + calc_days_in_year(yw.first)) % 7;
|
||||
if ((!first_weekday_mode && weekday < 4) || (first_weekday_mode && weekday == 0))
|
||||
{
|
||||
(yw.first)++;
|
||||
++yw.first;
|
||||
yw.second = 1;
|
||||
return yw;
|
||||
}
|
||||
}
|
||||
|
||||
yw.second = days / 7 + 1;
|
||||
return yw;
|
||||
}
|
||||
@ -853,7 +855,7 @@ public:
|
||||
* Returns 0 for monday, 1 for tuesday...
|
||||
*/
|
||||
template <typename DateOrTime>
|
||||
inline unsigned calc_weekday(DateOrTime v, bool sunday_first_day_of_week) const /// NOLINT
|
||||
inline UInt8 calc_weekday(DateOrTime v, bool sunday_first_day_of_week) const /// NOLINT
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
if (!sunday_first_day_of_week)
|
||||
@ -863,21 +865,21 @@ public:
|
||||
}
|
||||
|
||||
/// Calculate days in one year.
|
||||
inline unsigned calc_days_in_year(Int32 year) const /// NOLINT
|
||||
inline UInt16 calc_days_in_year(Int32 year) const /// NOLINT
|
||||
{
|
||||
return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);
|
||||
}
|
||||
|
||||
/// Number of month from some fixed moment in the past (year * 12 + month)
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toRelativeMonthNum(DateOrTime v) const
|
||||
inline Int32 toRelativeMonthNum(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
return lut[i].year * 12 + lut[i].month;
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline unsigned toRelativeQuarterNum(DateOrTime v) const
|
||||
inline Int32 toRelativeQuarterNum(DateOrTime v) const
|
||||
{
|
||||
const LUTIndex i = toLUTIndex(v);
|
||||
return lut[i].year * 4 + (lut[i].month - 1) / 3;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user