Merge branch 'master' into revert-37534-revert-37036-keeper-preprocess-operations

This commit is contained in:
Antonio Andelic 2022-06-14 12:51:35 +00:00
commit b7bd5a8eb1
89 changed files with 1375 additions and 735 deletions

View File

@ -172,7 +172,7 @@
#### Backward Incompatible Change
* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `parser_settings_after_format_compact` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)).
* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `allow_settings_after_format_in_insert` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)).
* Function `yandexConsistentHash` (consistent hashing algorithm by Konstantin "kostik" Oblakov) is renamed to `kostikConsistentHash`. The old name is left as an alias for compatibility. Although this change is backward compatible, we may remove the alias in subsequent releases, that's why it's recommended to update the usages of this function in your apps. [#35553](https://github.com/ClickHouse/ClickHouse/pull/35553) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature

View File

@ -13,7 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
## Upcoming Events
* [ClickHouse Meetup Amsterdam (in-person and online)](https://www.meetup.com/clickhouse-netherlands-user-group/events/286017044/) on June 8th, 2022

View File

@ -77,6 +77,7 @@ if (OS_LINUX AND NOT LINKER_NAME)
if (NOT LINKER_NAME)
if (GOLD_PATH)
message (WARNING "Linking with gold is not recommended. Please use lld.")
if (COMPILER_GCC)
set (LINKER_NAME "gold")
else ()

View File

@ -76,9 +76,7 @@ message (STATUS "LLVM library Directory: ${LLVM_LIBRARY_DIRS}")
message (STATUS "LLVM C++ compiler flags: ${LLVM_CXXFLAGS}")
# ld: unknown option: --color-diagnostics
if (APPLE)
set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "")
endif ()
set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "")
# Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind
set (CMAKE_INSTALL_RPATH "ON")

View File

@ -21,7 +21,9 @@ By default, starting above server instance will be run as default user without p
### connect to it from a native client
```bash
$ docker run -it --rm --link some-clickhouse-server:clickhouse-server clickhouse/clickhouse-client --host clickhouse-server
$ docker run -it --rm --link some-clickhouse-server:clickhouse-server --entrypoint clickhouse-client clickhouse/clickhouse-server --host clickhouse-server
# OR
$ docker exec -it some-clickhouse-server clickhouse-client
```
More information about [ClickHouse client](https://clickhouse.com/docs/en/interfaces/cli/).

View File

@ -7,22 +7,12 @@ RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
python3-requests \
llvm-9
&& apt-get clean
COPY s3downloader /s3downloader
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
ENV DATASETS="hits visits"
ENV EXPORT_S3_STORAGE_POLICIES=1
# Download Minio-related binaries
RUN arch=${TARGETARCH:-amd64} \
&& if [ "$arch" = "amd64" ] ; then wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio-20220103182258.0.0.x86_64.rpm"; else wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio-20220103182258.0.0.aarch64.rpm" ; fi \
&& wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \
&& chmod +x ./mc
ENV MINIO_ROOT_USER="clickhouse"
ENV MINIO_ROOT_PASSWORD="clickhouse"
COPY setup_minio.sh /
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -17,7 +17,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# install test configs
/usr/share/clickhouse-test/config/install.sh
./setup_minio.sh
./setup_minio.sh stateful
function start()
{

View File

@ -1,77 +0,0 @@
#!/bin/bash
# TODO: Make this file shared with stateless tests
#
# Usage for local run:
#
# ./docker/test/stateful/setup_minio.sh ./tests/
#
set -e -x -a -u
rpm2cpio ./minio-20220103182258.0.0.*.rpm | cpio -i --make-directories
find / -name minio
cp ./usr/local/bin/minio ./
ls -lha
mkdir -p ./minio_data
if [ ! -f ./minio ]; then
echo 'MinIO binary not found, downloading...'
BINARY_TYPE=$(uname -s | tr '[:upper:]' '[:lower:]')
wget "https://dl.min.io/server/minio/release/${BINARY_TYPE}-amd64/minio" \
&& chmod +x ./minio \
&& wget "https://dl.min.io/client/mc/release/${BINARY_TYPE}-amd64/mc" \
&& chmod +x ./mc
fi
MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse}
MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-clickhouse}
./minio --version
./minio server --address ":11111" ./minio_data &
i=0
while ! curl -v --silent http://localhost:11111 2>&1 | grep AccessDenied
do
if [[ $i == 60 ]]; then
echo "Failed to setup minio"
exit 0
fi
echo "Trying to connect to minio"
sleep 1
i=$((i + 1))
done
lsof -i :11111
sleep 5
./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
./mc admin user add clickminio test testtest
./mc admin policy set clickminio readwrite user=test
./mc mb clickminio/test
# Upload data to Minio. By default after unpacking all tests will in
# /usr/share/clickhouse-test/queries
TEST_PATH=${1:-/usr/share/clickhouse-test}
MINIO_DATA_PATH=${TEST_PATH}/queries/1_stateful/data_minio
# Iterating over globs will cause redudant FILE variale to be a path to a file, not a filename
# shellcheck disable=SC2045
for FILE in $(ls "${MINIO_DATA_PATH}"); do
echo "$FILE";
./mc cp "${MINIO_DATA_PATH}"/"$FILE" clickminio/test/"$FILE";
done
mkdir -p ~/.aws
cat <<EOT >> ~/.aws/credentials
[default]
aws_access_key_id=clickhouse
aws_secret_access_key=clickhouse
EOT

View File

@ -0,0 +1 @@
../stateless/setup_minio.sh

View File

@ -5,37 +5,36 @@ FROM clickhouse/test-base:$FROM_TAG
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
# golang version 1.13 on Ubuntu 20 is enough for tests
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
awscli \
brotli \
expect \
zstd \
golang \
lsof \
mysql-client=8.0* \
ncdu \
netcat-openbsd \
openjdk-11-jre-headless \
openssl \
postgresql-client \
protobuf-compiler \
python3 \
python3-lxml \
python3-pip \
python3-requests \
python3-termcolor \
python3-pip \
qemu-user-static \
sqlite3 \
sudo \
# golang version 1.13 on Ubuntu 20 is enough for tests
golang \
telnet \
tree \
unixodbc \
wget \
mysql-client=8.0* \
postgresql-client \
sqlite3 \
awscli \
openjdk-11-jre-headless \
rpm2cpio \
cpio
zstd \
&& apt-get clean
RUN pip3 install numpy scipy pandas Jinja2
@ -53,13 +52,17 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV NUM_TRIES=1
ENV MAX_RUN_TIME=0
# Unrelated to vars in setup_minio.sh, but should be the same there
# to have the same binaries for local running scenario
ARG MINIO_SERVER_VERSION=2022-01-03T18-22-58Z
ARG MINIO_CLIENT_VERSION=2022-01-05T23-52-51Z
ARG TARGETARCH
# Download Minio-related binaries
RUN arch=${TARGETARCH:-amd64} \
&& if [ "$arch" = "amd64" ] ; then wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio-20220103182258.0.0.x86_64.rpm"; else wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio-20220103182258.0.0.aarch64.rpm" ; fi \
&& wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \
&& chmod +x ./mc
&& wget "https://dl.min.io/server/minio/release/linux-${arch}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -O ./minio \
&& wget "https://dl.min.io/client/mc/release/linux-${arch}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \
&& chmod +x ./mc ./minio
RUN wget 'https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \

View File

@ -18,7 +18,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# install test configs
/usr/share/clickhouse-test/config/install.sh
./setup_minio.sh
./setup_minio.sh stateless
./setup_hdfs_minicluster.sh
# For flaky check we also enable thread fuzzer

View File

@ -1,29 +1,41 @@
#!/bin/bash
# Usage for local run:
#
# ./docker/test/stateless/setup_minio.sh ./tests/
#
USAGE='Usage for local run:
./docker/test/stateless/setup_minio.sh { stateful | stateless } ./tests/
'
set -e -x -a -u
rpm2cpio ./minio-20220103182258.0.0.*.rpm | cpio -i --make-directories
find / -name minio
cp ./usr/local/bin/minio ./
TEST_TYPE="$1"
shift
case $TEST_TYPE in
stateless) QUERY_DIR=0_stateless ;;
stateful) QUERY_DIR=1_stateful ;;
*) echo "unknown test type $TEST_TYPE"; echo "${USAGE}"; exit 1 ;;
esac
ls -lha
mkdir -p ./minio_data
if [ ! -f ./minio ]; then
MINIO_SERVER_VERSION=${MINIO_SERVER_VERSION:-2022-01-03T18-22-58Z}
MINIO_CLIENT_VERSION=${MINIO_CLIENT_VERSION:-2022-01-05T23-52-51Z}
case $(uname -m) in
x86_64) BIN_ARCH=amd64 ;;
aarch64) BIN_ARCH=arm64 ;;
*) echo "unknown architecture $(uname -m)"; exit 1 ;;
esac
echo 'MinIO binary not found, downloading...'
BINARY_TYPE=$(uname -s | tr '[:upper:]' '[:lower:]')
wget "https://dl.min.io/server/minio/release/${BINARY_TYPE}-amd64/minio" \
&& chmod +x ./minio \
&& wget "https://dl.min.io/client/mc/release/${BINARY_TYPE}-amd64/mc" \
&& chmod +x ./mc
wget "https://dl.min.io/server/minio/release/${BINARY_TYPE}-${BIN_ARCH}/archive/minio.RELEASE.${MINIO_SERVER_VERSION}" -O ./minio \
&& wget "https://dl.min.io/client/mc/release/${BINARY_TYPE}-${BIN_ARCH}/archive/mc.RELEASE.${MINIO_CLIENT_VERSION}" -O ./mc \
&& chmod +x ./mc ./minio
fi
MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse}
@ -52,14 +64,16 @@ sleep 5
./mc admin user add clickminio test testtest
./mc admin policy set clickminio readwrite user=test
./mc mb clickminio/test
./mc policy set public clickminio/test
if [ "$TEST_TYPE" = "stateless" ]; then
./mc policy set public clickminio/test
fi
# Upload data to Minio. By default after unpacking all tests will in
# /usr/share/clickhouse-test/queries
TEST_PATH=${1:-/usr/share/clickhouse-test}
MINIO_DATA_PATH=${TEST_PATH}/queries/0_stateless/data_minio
MINIO_DATA_PATH=${TEST_PATH}/queries/${QUERY_DIR}/data_minio
# Iterating over globs will cause redudant FILE variale to be a path to a file, not a filename
# shellcheck disable=SC2045
@ -71,6 +85,6 @@ done
mkdir -p ~/.aws
cat <<EOT >> ~/.aws/credentials
[default]
aws_access_key_id=clickhouse
aws_secret_access_key=clickhouse
aws_access_key_id=${MINIO_ROOT_USER}
aws_secret_access_key=${MINIO_ROOT_PASSWORD}
EOT

View File

@ -174,7 +174,7 @@ install_packages package_folder
configure
./setup_minio.sh
./setup_minio.sh stateful # to have a proper environment
start

View File

@ -32,6 +32,7 @@ The list of available `SYSTEM` statements:
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
@ -239,6 +240,14 @@ Returns `Ok.` even if table does not exist. Returns error when database does not
SYSTEM START MOVES [[db.]merge_tree_family_table_name]
```
### SYSTEM UNFREEZE {#query_language-system-unfreeze}
Clears freezed backup with the specified name from all the disks. See more about unfreezing separate parts in [ALTER TABLE table_name UNFREEZE WITH NAME ](alter/partition.md#alter_unfreeze-partition)
``` sql
SYSTEM UNFREEZE WITH NAME <backup_name>
```
## Managing ReplicatedMergeTree Tables
ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables.

View File

@ -30,6 +30,7 @@ sidebar_label: SYSTEM
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
@ -235,6 +236,14 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
SYSTEM START MOVES [[db.]merge_tree_family_table_name]
```
### SYSTEM UNFREEZE {#query_language-system-unfreeze}
Удаляет с диска все "замороженные" партиции данного бэкапа. Про удаление партиций по отдельности смотрите запрос [ALTER TABLE table_name UNFREEZE WITH NAME ](alter/partition.md#alter_unfreeze-partition)
``` sql
SYSTEM UNFREEZE WITH NAME <backup_name>
```
## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}
ClickHouse может управлять фоновыми процессами связанными c репликацией в таблицах семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md).

View File

@ -5,4 +5,4 @@ sidebar_position: 82
# Что нового в ClickHouse?
Планы развития вкратце изложены [здесь](https://github.com/ClickHouse/ClickHouse/issues/17623), а новости по предыдущим релизам подробно описаны в [журнале изменений](./changelog/).
Планы развития вкратце изложены [здесь](https://github.com/ClickHouse/ClickHouse/issues/32513), а новости по предыдущим релизам подробно описаны в [журнале изменений](./changelog/).

View File

@ -26,6 +26,7 @@ sidebar_label: SYSTEM
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [SYSTEM UNFREEZE](#query_language-system-unfreeze)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
@ -203,6 +204,14 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
```
### SYSTEM UNFREEZE {#query_language-system-unfreeze}
从所有磁盘中清除具有指定名称的冻结备份。 查看更多关于解冻单独部分的信息 [ALTER TABLE table_name UNFREEZE WITH NAME ](alter/partition.md#alter_unfreeze-partition)
``` sql
SYSTEM UNFREEZE WITH NAME <backup_name>
```
## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}
管理 [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md)表的后台复制相关进程。

View File

@ -164,6 +164,7 @@ enum class AccessType
M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
\
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\

View File

@ -120,6 +120,7 @@ namespace
AccessRights res = access;
res.modifyFlags(modifier);
res.modifyFlagsWithGrantOption(modifier);
/// Anyone has access to the "system" and "information_schema" database.
res.grant(AccessType::SELECT, DatabaseCatalog::SYSTEM_DATABASE);

View File

@ -326,7 +326,7 @@ Strings BackupCoordinationDistributed::listFiles(const String & prefix, const St
elements.push_back(String{new_element});
}
std::sort(elements.begin(), elements.end());
::sort(elements.begin(), elements.end());
return elements;
}

View File

@ -84,7 +84,7 @@ namespace
return true;
});
std::sort(res.begin(), res.end());
::sort(res.begin(), res.end());
res.erase(std::unique(res.begin(), res.end()), res.end());
return res;
}
@ -113,7 +113,7 @@ namespace
return true;
});
std::sort(res.begin(), res.end());
::sort(res.begin(), res.end());
res.erase(std::unique(res.begin(), res.end()), res.end());
return res;
}

View File

@ -22,8 +22,8 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int DUPLICATE_COLUMN;
extern const int NUMBER_OF_DIMENSIONS_MISMATHED;
extern const int NOT_IMPLEMENTED;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
}
namespace
@ -179,7 +179,7 @@ ColumnObject::Subcolumn::Subcolumn(
{
}
size_t ColumnObject::Subcolumn::Subcolumn::size() const
size_t ColumnObject::Subcolumn::size() const
{
size_t res = num_of_defaults_in_prefix;
for (const auto & part : data)
@ -187,7 +187,7 @@ size_t ColumnObject::Subcolumn::Subcolumn::size() const
return res;
}
size_t ColumnObject::Subcolumn::Subcolumn::byteSize() const
size_t ColumnObject::Subcolumn::byteSize() const
{
size_t res = 0;
for (const auto & part : data)
@ -195,7 +195,7 @@ size_t ColumnObject::Subcolumn::Subcolumn::byteSize() const
return res;
}
size_t ColumnObject::Subcolumn::Subcolumn::allocatedBytes() const
size_t ColumnObject::Subcolumn::allocatedBytes() const
{
size_t res = 0;
for (const auto & part : data)
@ -203,6 +203,37 @@ size_t ColumnObject::Subcolumn::Subcolumn::allocatedBytes() const
return res;
}
void ColumnObject::Subcolumn::get(size_t n, Field & res) const
{
if (isFinalized())
{
getFinalizedColumn().get(n, res);
return;
}
size_t ind = n;
if (ind < num_of_defaults_in_prefix)
{
res = least_common_type.get()->getDefault();
return;
}
ind -= num_of_defaults_in_prefix;
for (const auto & part : data)
{
if (ind < part->size())
{
part->get(ind, res);
res = convertFieldToTypeOrThrow(res, *least_common_type.get());
return;
}
ind -= part->size();
}
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for getting field is out of range", n);
}
void ColumnObject::Subcolumn::checkTypes() const
{
DataTypes prefix_types;
@ -221,7 +252,7 @@ void ColumnObject::Subcolumn::checkTypes() const
void ColumnObject::Subcolumn::insert(Field field)
{
auto info = getFieldInfo(field);
auto info = DB::getFieldInfo(field);
insert(std::move(field), std::move(info));
}
@ -244,8 +275,8 @@ static bool isConversionRequiredBetweenIntegers(const IDataType & lhs, const IDa
bool is_native_int = which_lhs.isNativeInt() && which_rhs.isNativeInt();
bool is_native_uint = which_lhs.isNativeUInt() && which_rhs.isNativeUInt();
return (is_native_int || is_native_uint)
&& lhs.getSizeOfValueInMemory() <= rhs.getSizeOfValueInMemory();
return (!is_native_int && !is_native_uint)
|| lhs.getSizeOfValueInMemory() > rhs.getSizeOfValueInMemory();
}
void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
@ -288,7 +319,7 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
}
else if (!least_common_base_type->equals(*base_type) && !isNothing(base_type))
{
if (!isConversionRequiredBetweenIntegers(*base_type, *least_common_base_type))
if (isConversionRequiredBetweenIntegers(*base_type, *least_common_base_type))
{
base_type = getLeastSupertype(DataTypes{std::move(base_type), least_common_base_type}, true);
type_changed = true;
@ -305,35 +336,96 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length)
{
assert(src.isFinalized());
const auto & src_column = src.data.back();
const auto & src_type = src.least_common_type.get();
assert(start + length <= src.size());
size_t end = start + length;
if (data.empty())
{
addNewColumnPart(src.least_common_type.get());
data.back()->insertRangeFrom(*src_column, start, length);
addNewColumnPart(src.getLeastCommonType());
}
else if (least_common_type.get()->equals(*src_type))
else if (!least_common_type.get()->equals(*src.getLeastCommonType()))
{
data.back()->insertRangeFrom(*src_column, start, length);
}
else
{
auto new_least_common_type = getLeastSupertype(DataTypes{least_common_type.get(), src_type}, true);
auto casted_column = castColumn({src_column, src_type, ""}, new_least_common_type);
if (!least_common_type.get()->equals(*new_least_common_type))
auto new_least_common_type = getLeastSupertype(DataTypes{least_common_type.get(), src.getLeastCommonType()}, true);
if (!new_least_common_type->equals(*least_common_type.get()))
addNewColumnPart(std::move(new_least_common_type));
}
data.back()->insertRangeFrom(*casted_column, start, length);
if (end <= src.num_of_defaults_in_prefix)
{
data.back()->insertManyDefaults(length);
return;
}
if (start < src.num_of_defaults_in_prefix)
data.back()->insertManyDefaults(src.num_of_defaults_in_prefix - start);
auto insert_from_part = [&](const auto & column, size_t from, size_t n)
{
assert(from + n <= column->size());
auto column_type = getDataTypeByColumn(*column);
if (column_type->equals(*least_common_type.get()))
{
data.back()->insertRangeFrom(*column, from, n);
return;
}
/// If we need to insert large range, there is no sense to cut part of column and cast it.
/// Casting of all column and inserting from it can be faster.
/// Threshold is just a guess.
if (n * 3 >= column->size())
{
auto casted_column = castColumn({column, column_type, ""}, least_common_type.get());
data.back()->insertRangeFrom(*casted_column, from, n);
return;
}
auto casted_column = column->cut(from, n);
casted_column = castColumn({casted_column, column_type, ""}, least_common_type.get());
data.back()->insertRangeFrom(*casted_column, 0, n);
};
size_t pos = 0;
size_t processed_rows = src.num_of_defaults_in_prefix;
/// Find the first part of the column that intersects the range.
while (pos < src.data.size() && processed_rows + src.data[pos]->size() < start)
{
processed_rows += src.data[pos]->size();
++pos;
}
/// Insert from the first part of column.
if (pos < src.data.size() && processed_rows < start)
{
size_t part_start = start - processed_rows;
size_t part_length = std::min(src.data[pos]->size() - part_start, end - start);
insert_from_part(src.data[pos], part_start, part_length);
processed_rows += src.data[pos]->size();
++pos;
}
/// Insert from the parts of column in the middle of range.
while (pos < src.data.size() && processed_rows + src.data[pos]->size() < end)
{
insert_from_part(src.data[pos], 0, src.data[pos]->size());
processed_rows += src.data[pos]->size();
++pos;
}
/// Insert from the last part of column if needed.
if (pos < src.data.size() && processed_rows < end)
{
size_t part_end = end - processed_rows;
insert_from_part(src.data[pos], 0, part_end);
}
}
bool ColumnObject::Subcolumn::isFinalized() const
{
return data.empty() ||
(data.size() == 1 && !data[0]->isSparse() && num_of_defaults_in_prefix == 0);
return num_of_defaults_in_prefix == 0 &&
(data.empty() || (data.size() == 1 && !data[0]->isSparse()));
}
void ColumnObject::Subcolumn::finalize()
@ -432,6 +524,13 @@ void ColumnObject::Subcolumn::popBack(size_t n)
num_of_defaults_in_prefix -= n;
}
ColumnObject::Subcolumn ColumnObject::Subcolumn::cut(size_t start, size_t length) const
{
Subcolumn new_subcolumn(0, is_nullable);
new_subcolumn.insertRangeFrom(*this, start, length);
return new_subcolumn;
}
Field ColumnObject::Subcolumn::getLastField() const
{
if (data.empty())
@ -442,6 +541,18 @@ Field ColumnObject::Subcolumn::getLastField() const
return (*last_part)[last_part->size() - 1];
}
FieldInfo ColumnObject::Subcolumn::getFieldInfo() const
{
const auto & base_type = least_common_type.getBase();
return FieldInfo
{
.scalar_type = base_type,
.have_nulls = base_type->isNullable(),
.need_convert = false,
.num_dimensions = least_common_type.getNumberOfDimensions(),
};
}
ColumnObject::Subcolumn ColumnObject::Subcolumn::recreateWithDefaultValues(const FieldInfo & field_info) const
{
auto scalar_type = field_info.scalar_type;
@ -479,6 +590,13 @@ const ColumnPtr & ColumnObject::Subcolumn::getFinalizedColumnPtr() const
return data[0];
}
ColumnObject::Subcolumn::LeastCommonType::LeastCommonType()
: type(std::make_shared<DataTypeNothing>())
, base_type(type)
, num_dimensions(0)
{
}
ColumnObject::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_)
: type(std::move(type_))
, base_type(getBaseTypeOfArray(type))
@ -525,16 +643,6 @@ size_t ColumnObject::size() const
return num_rows;
}
MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const
{
/// cloneResized with new_size == 0 is used for cloneEmpty().
if (new_size != 0)
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"ColumnObject doesn't support resize to non-zero length");
return ColumnObject::create(is_nullable);
}
size_t ColumnObject::byteSize() const
{
size_t res = 0;
@ -553,23 +661,21 @@ size_t ColumnObject::allocatedBytes() const
void ColumnObject::forEachSubcolumn(ColumnCallback callback)
{
if (!isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot iterate over non-finalized ColumnObject");
for (auto & entry : subcolumns)
callback(entry->data.data.back());
for (auto & part : entry->data.data)
callback(part);
}
void ColumnObject::insert(const Field & field)
{
const auto & object = field.get<const Object &>();
HashSet<StringRef, StringRefHash> inserted;
HashSet<StringRef, StringRefHash> inserted_paths;
size_t old_size = size();
for (const auto & [key_str, value] : object)
{
PathInData key(key_str);
inserted.insert(key_str);
inserted_paths.insert(key_str);
if (!hasSubcolumn(key))
addSubcolumn(key, old_size);
@ -578,8 +684,14 @@ void ColumnObject::insert(const Field & field)
}
for (auto & entry : subcolumns)
if (!inserted.has(entry->path.getPath()))
entry->data.insertDefault();
{
if (!inserted_paths.has(entry->path.getPath()))
{
bool inserted = tryInsertDefaultFromNested(entry);
if (!inserted)
entry->data.insertDefault();
}
}
++num_rows;
}
@ -594,26 +706,21 @@ void ColumnObject::insertDefault()
Field ColumnObject::operator[](size_t n) const
{
if (!isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get Field from non-finalized ColumnObject");
Object object;
for (const auto & entry : subcolumns)
object[entry->path.getPath()] = (*entry->data.data.back())[n];
Field object;
get(n, object);
return object;
}
void ColumnObject::get(size_t n, Field & res) const
{
if (!isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get Field from non-finalized ColumnObject");
assert(n < size());
res = Object();
auto & object = res.get<Object &>();
for (const auto & entry : subcolumns)
{
auto it = object.try_emplace(entry->path.getPath()).first;
entry->data.data.back()->get(n, it->second);
entry->data.get(n, it->second);
}
}
@ -626,41 +733,28 @@ void ColumnObject::insertFrom(const IColumn & src, size_t n)
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
const auto & src_object = assert_cast<const ColumnObject &>(src);
if (!src_object.isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insertRangeFrom non-finalized ColumnObject");
for (auto & entry : subcolumns)
{
if (src_object.hasSubcolumn(entry->path))
entry->data.insertRangeFrom(src_object.getSubcolumn(entry->path), start, length);
else
entry->data.insertManyDefaults(length);
}
for (const auto & entry : src_object.subcolumns)
{
if (!hasSubcolumn(entry->path))
{
if (entry->path.hasNested())
{
const auto & base_type = entry->data.getLeastCommonTypeBase();
FieldInfo field_info
{
.scalar_type = base_type,
.have_nulls = base_type->isNullable(),
.need_convert = false,
.num_dimensions = entry->data.getNumberOfDimensions(),
};
addNestedSubcolumn(entry->path, field_info, num_rows);
}
addNestedSubcolumn(entry->path, entry->data.getFieldInfo(), num_rows);
else
{
addSubcolumn(entry->path, num_rows);
}
}
auto & subcolumn = getSubcolumn(entry->path);
subcolumn.insertRangeFrom(entry->data, start, length);
auto & subcolumn = getSubcolumn(entry->path);
subcolumn.insertRangeFrom(entry->data, start, length);
}
for (auto & entry : subcolumns)
{
if (!src_object.hasSubcolumn(entry->path))
{
bool inserted = tryInsertManyDefaultsFromNested(entry);
if (!inserted)
entry->data.insertManyDefaults(length);
}
}
@ -668,21 +762,6 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len
finalize();
}
ColumnPtr ColumnObject::replicate(const Offsets & offsets) const
{
if (!isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot replicate non-finalized ColumnObject");
auto res_column = ColumnObject::create(is_nullable);
for (const auto & entry : subcolumns)
{
auto replicated_data = entry->data.data.back()->replicate(offsets)->assumeMutable();
res_column->addSubcolumn(entry->path, std::move(replicated_data));
}
return res_column;
}
void ColumnObject::popBack(size_t length)
{
for (auto & entry : subcolumns)
@ -692,10 +771,15 @@ void ColumnObject::popBack(size_t length)
}
template <typename Func>
ColumnPtr ColumnObject::applyForSubcolumns(Func && func, std::string_view func_name) const
MutableColumnPtr ColumnObject::applyForSubcolumns(Func && func) const
{
if (!isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot {} non-finalized ColumnObject", func_name);
{
auto finalized = IColumn::mutate(getPtr());
auto & finalized_object = assert_cast<ColumnObject &>(*finalized);
finalized_object.finalize();
return finalized_object.applyForSubcolumns(std::forward<Func>(func));
}
auto res = ColumnObject::create(is_nullable);
for (const auto & subcolumn : subcolumns)
@ -703,22 +787,36 @@ ColumnPtr ColumnObject::applyForSubcolumns(Func && func, std::string_view func_n
auto new_subcolumn = func(subcolumn->data.getFinalizedColumn());
res->addSubcolumn(subcolumn->path, new_subcolumn->assumeMutable());
}
return res;
}
ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); }, "permute");
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); });
}
ColumnPtr ColumnObject::filter(const Filter & filter, ssize_t result_size_hint) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); }, "filter");
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); });
}
ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); }, "index");
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); });
}
ColumnPtr ColumnObject::replicate(const Offsets & offsets) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.replicate(offsets); });
}
MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const
{
if (new_size == 0)
return ColumnObject::create(is_nullable);
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.cloneResized(new_size); });
}
const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const
@ -810,6 +908,92 @@ void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo &
if (num_rows == 0)
num_rows = new_size;
else if (new_size != num_rows)
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH,
"Required size of subcolumn {} ({}) is inconsistent with column size ({})",
key.getPath(), new_size, num_rows);
}
const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const
{
if (!entry->path.hasNested())
return nullptr;
size_t old_size = entry->data.size();
const auto * current_node = subcolumns.findLeaf(entry->path);
const Subcolumns::Node * leaf = nullptr;
while (current_node)
{
/// Try to find the first Nested up to the current node.
const auto * node_nested = subcolumns.findParent(current_node,
[](const auto & candidate) { return candidate.isNested(); });
if (!node_nested)
break;
/// Find the leaf with subcolumn that contains values
/// for the last rows.
/// If there are no leaves, skip current node and find
/// the next node up to the current.
leaf = subcolumns.findLeaf(node_nested,
[&](const auto & candidate)
{
return candidate.data.size() > old_size;
});
if (leaf)
break;
current_node = node_nested->parent;
}
if (leaf && isNothing(leaf->data.getLeastCommonTypeBase()))
return nullptr;
return leaf;
}
bool ColumnObject::tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const
{
const auto * leaf = getLeafOfTheSameNested(entry);
if (!leaf)
return false;
size_t old_size = entry->data.size();
auto field_info = entry->data.getFieldInfo();
/// Cut the needed range from the found leaf
/// and replace scalar values to the correct
/// default values for given entry.
auto new_subcolumn = leaf->data
.cut(old_size, leaf->data.size() - old_size)
.recreateWithDefaultValues(field_info);
entry->data.insertRangeFrom(new_subcolumn, 0, new_subcolumn.size());
return true;
}
bool ColumnObject::tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const
{
const auto * leaf = getLeafOfTheSameNested(entry);
if (!leaf)
return false;
auto last_field = leaf->data.getLastField();
if (last_field.isNull())
return false;
size_t leaf_num_dimensions = leaf->data.getNumberOfDimensions();
size_t entry_num_dimensions = entry->data.getNumberOfDimensions();
auto default_scalar = entry_num_dimensions > leaf_num_dimensions
? createEmptyArrayField(entry_num_dimensions - leaf_num_dimensions)
: entry->data.getLeastCommonTypeBase()->getDefault();
auto default_field = applyVisitor(FieldVisitorReplaceScalars(default_scalar, leaf_num_dimensions), last_field);
entry->data.insert(std::move(default_field));
return true;
}
PathsInData ColumnObject::getKeys() const
@ -835,7 +1019,7 @@ void ColumnObject::finalize()
{
const auto & least_common_type = entry->data.getLeastCommonType();
/// Do not add subcolumns, which consists only from NULLs.
/// Do not add subcolumns, which consist only from NULLs.
if (isNothing(getBaseTypeOfArray(least_common_type)))
continue;

View File

@ -65,6 +65,7 @@ public:
size_t size() const;
size_t byteSize() const;
size_t allocatedBytes() const;
void get(size_t n, Field & res) const;
bool isFinalized() const;
const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); }
@ -84,6 +85,8 @@ public:
void insertRangeFrom(const Subcolumn & src, size_t start, size_t length);
void popBack(size_t n);
Subcolumn cut(size_t start, size_t length) const;
/// Converts all column's parts to the common type and
/// creates a single column that stores all values.
void finalize();
@ -91,6 +94,8 @@ public:
/// Returns last inserted field.
Field getLastField() const;
FieldInfo getFieldInfo() const;
/// Recreates subcolumn with default scalar values and keeps sizes of arrays.
/// Used to create columns of type Nested with consistent array sizes.
Subcolumn recreateWithDefaultValues(const FieldInfo & field_info) const;
@ -101,13 +106,16 @@ public:
const IColumn & getFinalizedColumn() const;
const ColumnPtr & getFinalizedColumnPtr() const;
const std::vector<WrappedPtr> & getData() const { return data; }
size_t getNumberOfDefaultsInPrefix() const { return num_of_defaults_in_prefix; }
friend class ColumnObject;
private:
class LeastCommonType
{
public:
LeastCommonType() = default;
LeastCommonType();
explicit LeastCommonType(DataTypePtr type_);
const DataTypePtr & get() const { return type; }
@ -175,6 +183,11 @@ public:
/// It cares about consistency of sizes of Nested arrays.
void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size);
/// Finds a subcolumn from the same Nested type as @entry and inserts
/// an array with default values with consistent sizes as in Nested type.
bool tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const;
bool tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const;
const Subcolumns & getSubcolumns() const { return subcolumns; }
Subcolumns & getSubcolumns() { return subcolumns; }
PathsInData getKeys() const;
@ -189,7 +202,6 @@ public:
TypeIndex getDataType() const override { return TypeIndex::Object; }
size_t size() const override;
MutableColumnPtr cloneResized(size_t new_size) const override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
void forEachSubcolumn(ColumnCallback callback) override;
@ -197,13 +209,14 @@ public:
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr replicate(const Offsets & offsets) const override;
void popBack(size_t length) override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumnPtr cloneResized(size_t new_size) const override;
/// All other methods throw exception.
@ -236,7 +249,11 @@ private:
}
template <typename Func>
ColumnPtr applyForSubcolumns(Func && func, std::string_view func_name) const;
MutableColumnPtr applyForSubcolumns(Func && func) const;
/// For given subcolumn return subcolumn from the same Nested type.
/// It's used to get shared sized of Nested to insert correct default values.
const Subcolumns::Node * getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const;
};
}

View File

@ -0,0 +1,120 @@
#include <Common/FieldVisitorsAccurateComparison.h>
#include <DataTypes/getLeastSupertype.h>
#include <Interpreters/castColumn.h>
#include <Interpreters/convertFieldToType.h>
#include <Columns/ColumnObject.h>
#include <Common/FieldVisitorToString.h>
#include <Common/randomSeed.h>
#include <fmt/core.h>
#include <pcg_random.hpp>
#include <gtest/gtest.h>
#include <random>
using namespace DB;
static pcg64 rng(randomSeed());
Field getRandomField(size_t type)
{
switch (type)
{
case 0:
return rng();
case 1:
return std::uniform_real_distribution<>(0.0, 1.0)(rng);
case 2:
return std::string(rng() % 10, 'a' + rng() % 26);
default:
return Field();
}
}
std::pair<ColumnObject::Subcolumn, std::vector<Field>> generate(size_t size)
{
bool has_defaults = rng() % 3 == 0;
size_t num_defaults = has_defaults ? rng() % size : 0;
ColumnObject::Subcolumn subcolumn(num_defaults, false);
std::vector<Field> fields;
while (subcolumn.size() < size)
{
size_t part_size = rng() % (size - subcolumn.size()) + 1;
size_t field_type = rng() % 3;
for (size_t i = 0; i < part_size; ++i)
{
fields.push_back(getRandomField(field_type));
subcolumn.insert(fields.back());
}
}
std::vector<Field> result_fields;
for (size_t i = 0; i < num_defaults; ++i)
result_fields.emplace_back();
result_fields.insert(result_fields.end(), fields.begin(), fields.end());
return {std::move(subcolumn), std::move(result_fields)};
}
void checkFieldsAreEqual(ColumnObject::Subcolumn subcolumn, const std::vector<Field> & fields)
{
ASSERT_EQ(subcolumn.size(), fields.size());
for (size_t i = 0; i < subcolumn.size(); ++i)
{
Field field;
subcolumn.get(i, field); // Also check 'get' method.
if (!applyVisitor(FieldVisitorAccurateEquals(), field, fields[i]))
{
std::cerr << fmt::format("Wrong value at position {}, expected {}, got {}",
i, applyVisitor(FieldVisitorToString(), fields[i]), applyVisitor(FieldVisitorToString(), field));
ASSERT_TRUE(false);
}
}
}
constexpr size_t T = 1000;
constexpr size_t N = 1000;
TEST(ColumnObject, InsertRangeFrom)
{
for (size_t t = 0; t < T; ++t)
{
auto [subcolumn_dst, fields_dst] = generate(N);
auto [subcolumn_src, fields_src] = generate(N);
ASSERT_EQ(subcolumn_dst.size(), fields_dst.size());
ASSERT_EQ(subcolumn_src.size(), fields_src.size());
const auto & type_dst = subcolumn_dst.getLeastCommonType();
const auto & type_src = subcolumn_src.getLeastCommonType();
auto type_res = getLeastSupertype(DataTypes{type_dst, type_src}, true);
size_t from = rng() % subcolumn_src.size();
size_t to = rng() % subcolumn_src.size();
if (from > to)
std::swap(from, to);
++to;
for (auto & field : fields_dst)
{
if (field.isNull())
field = type_res->getDefault();
else
field = convertFieldToTypeOrThrow(field, *type_res);
}
for (size_t i = from; i < to; ++i)
{
if (fields_src[i].isNull())
fields_dst.push_back(type_res->getDefault());
else
fields_dst.push_back(convertFieldToTypeOrThrow(fields_src[i], *type_res));
}
subcolumn_dst.insertRangeFrom(subcolumn_src, from, to - from);
checkFieldsAreEqual(subcolumn_dst, fields_dst);
}
}

View File

@ -11,7 +11,7 @@
#include <Common/FieldVisitors.h>
using namespace DB;
pcg64 rng(randomSeed());
static pcg64 rng(randomSeed());
std::pair<MutableColumnPtr, MutableColumnPtr> createColumns(size_t n, size_t k)
{

View File

@ -554,54 +554,19 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
bool LRUFileCache::tryReserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
{
auto query_context = enable_filesystem_query_cache_limit ? getCurrentQueryContext(cache_lock) : nullptr;
if (!query_context)
return tryReserveForMainList(key, offset, size, nullptr, cache_lock);
/// If the context can be found, subsequent cache replacements are made through the Query context.
if (query_context)
{
auto res = tryReserveForQuery(key, offset, size, query_context, cache_lock);
switch (res)
{
case ReserveResult::FITS_IN_QUERY_LIMIT_AND_RESERVATION_COMPLETED :
{
/// When the maximum cache size of the query is reached, the cache will be
/// evicted from the history cache accessed by the current query.
return true;
}
case ReserveResult::EXCEEDS_QUERY_LIMIT :
{
/// The query currently does not have enough space to reserve.
/// It returns false and reads data directly from the remote fs.
return false;
}
case ReserveResult::FITS_IN_QUERY_LIMIT_NEED_RESERVE_FROM_MAIN_LIST :
{
/// When the maximum cache capacity of the request is not reached, the cache
/// block is evicted from the main LRU queue.
return tryReserveForMainList(key, offset, size, query_context, cache_lock);
}
}
__builtin_unreachable();
}
else
{
return tryReserveForMainList(key, offset, size, query_context, cache_lock);
}
}
LRUFileCache::ReserveResult LRUFileCache::tryReserveForQuery(const Key & key, size_t offset, size_t size, QueryContextPtr query_context, std::lock_guard<std::mutex> & cache_lock)
{
/// The maximum cache capacity of the request is not reached, thus the
//// cache block is evicted from the main LRU queue by tryReserveForMainList().
if (query_context->getCacheSize() + size <= query_context->getMaxCacheSize())
{
return ReserveResult::FITS_IN_QUERY_LIMIT_NEED_RESERVE_FROM_MAIN_LIST;
}
else if (query_context->getCacheSize() + size <= query_context->getMaxCacheSize())
return tryReserveForMainList(key, offset, size, query_context, cache_lock);
/// When skip_download_if_exceeds_query_cache is true, there is no need
/// to evict old data, skip the cache and read directly from remote fs.
else if (query_context->isSkipDownloadIfExceed())
{
return ReserveResult::EXCEEDS_QUERY_LIMIT;
}
return false;
/// The maximum cache size of the query is reached, the cache will be
/// evicted from the history cache accessed by the current query.
else
@ -617,7 +582,7 @@ LRUFileCache::ReserveResult LRUFileCache::tryReserveForQuery(const Key & key, si
auto is_overflow = [&]
{
return (max_size != 0 && queue.getTotalWeight(cache_lock) + size - removed_size > max_size)
return (max_size != 0 && queue.getTotalCacheSize(cache_lock) + size - removed_size > max_size)
|| (max_element_size != 0 && queue_size > max_element_size)
|| (query_context->getCacheSize() + size - removed_size > query_context->getMaxCacheSize());
};
@ -666,26 +631,26 @@ LRUFileCache::ReserveResult LRUFileCache::tryReserveForQuery(const Key & key, si
}
}
auto remove_file_segment = [&](FileSegmentPtr file_segment, size_t file_segment_size)
{
query_context->remove(file_segment->key(), file_segment->offset(), file_segment_size, cache_lock);
std::lock_guard segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
};
assert(trash.empty());
for (auto & cell : trash)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
query_context->remove(file_segment->key(), file_segment->offset(), cell->size(), cache_lock);
std::lock_guard segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
if (auto file_segment = cell->file_segment)
remove_file_segment(file_segment, cell->size());
}
for (auto & iter : ghost)
query_context->remove(iter->key, iter->offset, iter->size, cache_lock);
if (is_overflow())
{
return ReserveResult::EXCEEDS_QUERY_LIMIT;
}
return false;
if (cell_for_reserve)
{
@ -698,18 +663,12 @@ LRUFileCache::ReserveResult LRUFileCache::tryReserveForQuery(const Key & key, si
for (auto & cell : to_evict)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
query_context->remove(file_segment->key(), file_segment->offset(), cell->size(), cache_lock);
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
if (auto file_segment = cell->file_segment)
remove_file_segment(file_segment, cell->size());
}
query_context->reserve(key, offset, size, cache_lock);
return ReserveResult::FITS_IN_QUERY_LIMIT_NEED_RESERVE_FROM_MAIN_LIST;
return true;
}
}
@ -732,7 +691,7 @@ bool LRUFileCache::tryReserveForMainList(
auto is_overflow = [&]
{
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
return (max_size != 0 && queue.getTotalWeight(cache_lock) + size - removed_size > max_size)
return (max_size != 0 && queue.getTotalCacheSize(cache_lock) + size - removed_size > max_size)
|| (max_element_size != 0 && queue_size > max_element_size);
};
@ -785,18 +744,19 @@ bool LRUFileCache::tryReserveForMainList(
}
}
auto remove_file_segment = [&](FileSegmentPtr file_segment)
{
std::lock_guard segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
};
/// This case is very unlikely, can happen in case of exception from
/// file_segment->complete(), which would be a logical error.
assert(trash.empty());
for (auto & cell : trash)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
std::lock_guard segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
if (auto file_segment = cell->file_segment)
remove_file_segment(file_segment);
}
if (is_overflow())
@ -817,15 +777,11 @@ bool LRUFileCache::tryReserveForMainList(
for (auto & cell : to_evict)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
if (auto file_segment = cell->file_segment)
remove_file_segment(file_segment);
}
if (queue.getTotalWeight(cache_lock) > (1ull << 63))
if (queue.getTotalCacheSize(cache_lock) > (1ull << 63))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
if (query_context)
@ -1116,7 +1072,7 @@ size_t LRUFileCache::getUsedCacheSize() const
size_t LRUFileCache::getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return queue.getTotalWeight(cache_lock);
return queue.getTotalCacheSize(cache_lock);
}
size_t LRUFileCache::getAvailableCacheSize() const
@ -1305,8 +1261,8 @@ void LRUFileCache::assertQueueCorrectness(std::lock_guard<std::mutex> & cache_lo
total_size += size;
}
assert(total_size == queue.getTotalWeight(cache_lock));
assert(queue.getTotalWeight(cache_lock) <= max_size);
assert(total_size == queue.getTotalCacheSize(cache_lock));
assert(queue.getTotalCacheSize(cache_lock) <= max_size);
assert(queue.getElementsNum(cache_lock) <= max_element_size);
}

View File

@ -130,7 +130,7 @@ protected:
using Iterator = typename std::list<FileKeyAndOffset>::iterator;
size_t getTotalWeight(std::lock_guard<std::mutex> & /* cache_lock */) const { return cache_size; }
size_t getTotalCacheSize(std::lock_guard<std::mutex> & /* cache_lock */) const { return cache_size; }
size_t getElementsNum(std::lock_guard<std::mutex> & /* cache_lock */) const { return queue.size(); }
@ -356,13 +356,6 @@ private:
size_t max_stash_element_size;
size_t enable_cache_hits_threshold;
enum class ReserveResult
{
FITS_IN_QUERY_LIMIT_AND_RESERVATION_COMPLETED,
EXCEEDS_QUERY_LIMIT,
FITS_IN_QUERY_LIMIT_NEED_RESERVE_FROM_MAIN_LIST,
};
Poco::Logger * log;
FileSegments getImpl(
@ -387,12 +380,6 @@ private:
QueryContextPtr query_context,
std::lock_guard<std::mutex> & cache_lock);
/// Limit the maximum cache size for current query.
LRUFileCache::ReserveResult tryReserveForQuery(
const Key & key, size_t offset, size_t size,
QueryContextPtr query_context,
std::lock_guard<std::mutex> & cache_lock);
void remove(
Key key, size_t offset,
std::lock_guard<std::mutex> & cache_lock,

View File

@ -7,6 +7,7 @@
#include <filesystem>
#include <base/find_symbols.h>
#include <base/sort.h>
#include <base/getFQDNOrHostName.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
@ -169,7 +170,7 @@ std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
shuffle_hosts.emplace_back(shuffle_host);
}
std::sort(
::sort(
shuffle_hosts.begin(), shuffle_hosts.end(),
[](const ShuffleHost & lhs, const ShuffleHost & rhs)
{

View File

@ -36,21 +36,20 @@ std::string formatChangelogPath(const std::string & prefix, const ChangelogFileD
return path;
}
ChangelogFileDescription getChangelogFileDescription(const std::string & path_str)
ChangelogFileDescription getChangelogFileDescription(const std::filesystem::path & path)
{
std::filesystem::path path(path_str);
std::string filename = path.stem();
Strings filename_parts;
boost::split(filename_parts, filename, boost::is_any_of("_"));
if (filename_parts.size() < 3)
throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path_str);
throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid changelog {}", path.generic_string());
ChangelogFileDescription result;
result.prefix = filename_parts[0];
result.from_log_index = parse<uint64_t>(filename_parts[1]);
result.to_log_index = parse<uint64_t>(filename_parts[2]);
result.extension = path.extension();
result.path = path_str;
result.path = path.generic_string();
return result;
}
@ -276,6 +275,7 @@ Changelog::Changelog(
Poco::Logger * log_,
bool compress_logs_)
: changelogs_dir(changelogs_dir_)
, changelogs_detached_dir(changelogs_dir / "detached")
, rotate_interval(rotate_interval_)
, force_sync(force_sync_)
, log(log_)
@ -288,12 +288,15 @@ Changelog::Changelog(
for (const auto & p : fs::directory_iterator(changelogs_dir))
{
if (p == changelogs_detached_dir)
continue;
auto file_description = getChangelogFileDescription(p.path());
existing_changelogs[file_description.from_log_index] = file_description;
}
if (existing_changelogs.empty())
LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", changelogs_dir);
LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", changelogs_dir.generic_string());
clean_log_thread = ThreadFromGlobalPool([this] { cleanLogThread(); });
}
@ -328,7 +331,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
/// entries from leader.
if (changelog_description.from_log_index > last_commited_log_index && (changelog_description.from_log_index - last_commited_log_index) > 1)
{
LOG_ERROR(log, "Some records was lost, last committed log index {}, smallest available log index on disk {}. Hopefully will receive missing records from leader.", last_commited_log_index, changelog_description.from_log_index);
LOG_ERROR(log, "Some records were lost, last committed log index {}, smallest available log index on disk {}. Hopefully will receive missing records from leader.", last_commited_log_index, changelog_description.from_log_index);
/// Nothing to do with our more fresh log, leader will overwrite them, so remove everything and just start from last_commited_index
removeAllLogs();
min_log_id = last_commited_log_index;
@ -342,6 +345,12 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
LOG_WARNING(log, "Don't have required amount of reserved log records. Need to read from {}, smallest available log index on disk {}.", start_to_read_from, changelog_description.from_log_index);
}
}
else if ((changelog_description.from_log_index - last_log_read_result->last_read_index) > 1)
{
LOG_ERROR(log, "Some records were lost, last found log index {}, while the next log index on disk is {}. Hopefully will receive missing records from leader.", last_log_read_result->last_read_index, changelog_description.from_log_index);
removeAllLogsAfter(last_log_read_result->log_start_index);
break;
}
ChangelogReader reader(changelog_description.path);
last_log_read_result = reader.readChangelog(logs, start_to_read_from, log);
@ -431,6 +440,44 @@ void Changelog::initWriter(const ChangelogFileDescription & description)
current_writer = std::make_unique<ChangelogWriter>(description.path, WriteMode::Append, description.from_log_index);
}
namespace
{
std::string getCurrentTimestampFolder()
{
const auto timestamp = LocalDateTime{std::time(nullptr)};
return fmt::format(
"{:02}{:02}{:02}T{:02}{:02}{:02}",
timestamp.year(),
timestamp.month(),
timestamp.day(),
timestamp.hour(),
timestamp.minute(),
timestamp.second());
}
}
void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end)
{
const auto timestamp_folder = changelogs_detached_dir / getCurrentTimestampFolder();
for (auto itr = begin; itr != end;)
{
if (!std::filesystem::exists(timestamp_folder))
{
LOG_WARNING(log, "Moving broken logs to {}", timestamp_folder.generic_string());
std::filesystem::create_directories(timestamp_folder);
}
LOG_WARNING(log, "Removing changelog {}", itr->second.path);
const std::filesystem::path path = itr->second.path;
const auto new_path = timestamp_folder / path.filename();
std::filesystem::rename(path, new_path);
itr = existing_changelogs.erase(itr);
}
}
void Changelog::removeAllLogsAfter(uint64_t remove_after_log_start_index)
{
auto start_to_remove_from_itr = existing_changelogs.upper_bound(remove_after_log_start_index);
@ -440,12 +487,8 @@ void Changelog::removeAllLogsAfter(uint64_t remove_after_log_start_index)
size_t start_to_remove_from_log_id = start_to_remove_from_itr->first;
/// All subsequent logs shouldn't exist. But they may exist if we crashed after writeAt started. Remove them.
for (auto itr = start_to_remove_from_itr; itr != existing_changelogs.end();)
{
LOG_WARNING(log, "Removing changelog {}, because it's goes after broken changelog entry", itr->second.path);
std::filesystem::remove(itr->second.path);
itr = existing_changelogs.erase(itr);
}
LOG_WARNING(log, "Removing changelogs that go after broken changelog entry");
removeExistingLogs(start_to_remove_from_itr, existing_changelogs.end());
std::erase_if(logs, [start_to_remove_from_log_id] (const auto & item) { return item.first >= start_to_remove_from_log_id; });
}
@ -453,12 +496,7 @@ void Changelog::removeAllLogsAfter(uint64_t remove_after_log_start_index)
void Changelog::removeAllLogs()
{
LOG_WARNING(log, "Removing all changelogs");
for (auto itr = existing_changelogs.begin(); itr != existing_changelogs.end();)
{
LOG_WARNING(log, "Removing changelog {}, because it's goes after broken changelog entry", itr->second.path);
std::filesystem::remove(itr->second.path);
itr = existing_changelogs.erase(itr);
}
removeExistingLogs(existing_changelogs.begin(), existing_changelogs.end());
logs.clear();
}

View File

@ -138,6 +138,13 @@ private:
/// Starts new file [new_start_log_index, new_start_log_index + rotate_interval]
void rotate(uint64_t new_start_log_index);
/// Currently existing changelogs
std::map<uint64_t, ChangelogFileDescription> existing_changelogs;
using ChangelogIter = decltype(existing_changelogs)::iterator;
void removeExistingLogs(ChangelogIter begin, ChangelogIter end);
static void removeLog(const std::filesystem::path & path, const std::filesystem::path & detached_folder);
/// Remove all changelogs from disk with start_index bigger than start_to_remove_from_id
void removeAllLogsAfter(uint64_t remove_after_log_start_index);
/// Remove all logs from disk
@ -148,14 +155,13 @@ private:
/// Clean useless log files in a background thread
void cleanLogThread();
const std::string changelogs_dir;
const std::filesystem::path changelogs_dir;
const std::filesystem::path changelogs_detached_dir;
const uint64_t rotate_interval;
const bool force_sync;
Poco::Logger * log;
bool compress_logs;
/// Currently existing changelogs
std::map<uint64_t, ChangelogFileDescription> existing_changelogs;
/// Current writer for changelog file
std::unique_ptr<ChangelogWriter> current_writer;

View File

@ -166,7 +166,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
/// Better to sort before serialization, otherwise snapshots can be different on different replicas
std::vector<std::pair<int64_t, Coordination::ACLs>> sorted_acl_map(snapshot.acl_map.begin(), snapshot.acl_map.end());
std::sort(sorted_acl_map.begin(), sorted_acl_map.end());
::sort(sorted_acl_map.begin(), sorted_acl_map.end());
/// Serialize ACLs map
writeBinary(sorted_acl_map.size(), out);
for (const auto & [acl_id, acls] : sorted_acl_map)
@ -209,7 +209,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
/// otherwise snapshots will be different
std::vector<std::pair<int64_t, int64_t>> sorted_session_and_timeout(
snapshot.session_and_timeout.begin(), snapshot.session_and_timeout.end());
std::sort(sorted_session_and_timeout.begin(), sorted_session_and_timeout.end());
::sort(sorted_session_and_timeout.begin(), sorted_session_and_timeout.end());
/// Serialize sessions
size_t size = sorted_session_and_timeout.size();

View File

@ -700,13 +700,32 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead)
EXPECT_TRUE(fs::exists("./logs/changelog_36_40.bin" + params.extension));
}
namespace
{
void assertBrokenLogRemoved(const fs::path & log_folder, const fs::path & filename)
{
EXPECT_FALSE(fs::exists(log_folder / filename));
// broken logs are sent to the detached/{timestamp} folder
// we don't know timestamp so we iterate all of them
for (const auto & dir_entry : fs::recursive_directory_iterator(log_folder / "detached"))
{
if (dir_entry.path().filename() == filename)
return;
}
FAIL() << "Broken log " << filename << " was not moved to the detached folder";
}
}
TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
{
auto params = GetParam();
ChangelogDirTest test("./logs");
static const fs::path log_folder{"./logs"};
DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression);
auto params = GetParam();
ChangelogDirTest test(log_folder);
DB::KeeperLogStore changelog(log_folder, 5, true, params.enable_compression);
changelog.init(1, 0);
for (size_t i = 0; i < 35; ++i)
@ -738,10 +757,10 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension));
assertBrokenLogRemoved(log_folder, "changelog_16_20.bin" + params.extension);
assertBrokenLogRemoved(log_folder, "changelog_21_25.bin" + params.extension);
assertBrokenLogRemoved(log_folder, "changelog_26_30.bin" + params.extension);
assertBrokenLogRemoved(log_folder, "changelog_31_35.bin" + params.extension);
auto entry = getLogEntry("h", 7777);
changelog_reader.append(entry);
@ -753,10 +772,10 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate)
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension));
EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension));
assertBrokenLogRemoved(log_folder, "changelog_16_20.bin" + params.extension);
assertBrokenLogRemoved(log_folder, "changelog_21_25.bin" + params.extension);
assertBrokenLogRemoved(log_folder, "changelog_26_30.bin" + params.extension);
assertBrokenLogRemoved(log_folder, "changelog_31_35.bin" + params.extension);
DB::KeeperLogStore changelog_reader2("./logs", 5, true, params.enable_compression);
changelog_reader2.init(1, 0);
@ -790,14 +809,13 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2)
EXPECT_EQ(changelog_reader.size(), 0);
EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_21_40.bin" + params.extension));
assertBrokenLogRemoved("./logs", "changelog_21_40.bin" + params.extension);
auto entry = getLogEntry("hello_world", 7777);
changelog_reader.append(entry);
changelog_reader.end_of_append_batch(0, 0);
EXPECT_EQ(changelog_reader.size(), 1);
EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777);
DB::KeeperLogStore changelog_reader2("./logs", 1, true, params.enable_compression);
changelog_reader2.init(1, 0);
EXPECT_EQ(changelog_reader2.size(), 1);
@ -827,10 +845,40 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles)
DB::KeeperLogStore changelog_reader("./logs", 20, true, params.enable_compression);
/// It should print error message, but still able to start
changelog_reader.init(5, 0);
EXPECT_FALSE(fs::exists("./logs/changelog_1_20.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_21_40.bin" + params.extension));
assertBrokenLogRemoved("./logs", "changelog_21_40.bin" + params.extension);
}
TEST_P(CoordinationTest, ChangelogTestLostFiles2)
{
auto params = GetParam();
ChangelogDirTest test("./logs");
DB::KeeperLogStore changelog("./logs", 10, true, params.enable_compression);
changelog.init(1, 0);
for (size_t i = 0; i < 35; ++i)
{
auto entry = getLogEntry(std::to_string(i) + "_hello_world", (i + 44) * 10);
changelog.append(entry);
}
changelog.end_of_append_batch(0, 0);
EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension));
EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + params.extension));
EXPECT_TRUE(fs::exists("./logs/changelog_21_30.bin" + params.extension));
EXPECT_TRUE(fs::exists("./logs/changelog_31_40.bin" + params.extension));
// we have a gap in our logs, we need to remove all the logs after the gap
fs::remove("./logs/changelog_21_30.bin" + params.extension);
DB::KeeperLogStore changelog_reader("./logs", 10, true, params.enable_compression);
/// It should print error message, but still able to start
changelog_reader.init(5, 0);
EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension));
EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + params.extension));
assertBrokenLogRemoved("./logs", "changelog_31_40.bin" + params.extension);
}
struct IntNode
{
int value;

View File

@ -17,6 +17,11 @@ namespace std
using namespace experimental::coroutines_v1;
}
#if __has_warning("-Wdeprecated-experimental-coroutine")
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-experimental-coroutine"
#endif
#else
#include <coroutine>
#pragma GCC diagnostic push

View File

@ -455,7 +455,7 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node * node)
}
/// Sort to always create the same type for the same set of subcolumns.
std::sort(tuple_elements.begin(), tuple_elements.end(),
::sort(tuple_elements.begin(), tuple_elements.end(),
[](const auto & lhs, const auto & rhs) { return std::get<0>(lhs) < std::get<0>(rhs); });
auto tuple_names = extractVector<0>(tuple_elements);
@ -692,7 +692,7 @@ void replaceMissedSubcolumnsByConstants(
res.emplace_back(full_name, types[i]);
}
std::sort(res.begin(), res.end());
::sort(res.begin(), res.end());
return res;
};
@ -718,9 +718,9 @@ void replaceMissedSubcolumnsByConstants(
addConstantToWithClause(query, name, type);
}
void finalizeObjectColumns(MutableColumns & columns)
void finalizeObjectColumns(const MutableColumns & columns)
{
for (auto & column : columns)
for (const auto & column : columns)
if (auto * column_object = typeid_cast<ColumnObject *>(column.get()))
column_object->finalize();
}

View File

@ -51,7 +51,7 @@ void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescript
NameSet getNamesOfObjectColumns(const NamesAndTypesList & columns_list);
bool hasObjectColumns(const ColumnsDescription & columns);
void finalizeObjectColumns(MutableColumns & columns);
void finalizeObjectColumns(const MutableColumns & columns);
/// Updates types of objects in @object_columns inplace
/// according to types in new_columns.

View File

@ -33,71 +33,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
namespace
{
using Node = typename ColumnObject::Subcolumns::Node;
/// Finds a subcolumn from the same Nested type as @entry and inserts
/// an array with default values with consistent sizes as in Nested type.
bool tryInsertDefaultFromNested(
const std::shared_ptr<Node> & entry, const ColumnObject::Subcolumns & subcolumns)
{
if (!entry->path.hasNested())
return false;
const Node * current_node = subcolumns.findLeaf(entry->path);
const Node * leaf = nullptr;
size_t num_skipped_nested = 0;
while (current_node)
{
/// Try to find the first Nested up to the current node.
const auto * node_nested = subcolumns.findParent(current_node,
[](const auto & candidate) { return candidate.isNested(); });
if (!node_nested)
break;
/// If there are no leaves, skip current node and find
/// the next node up to the current.
leaf = subcolumns.findLeaf(node_nested,
[&](const auto & candidate)
{
return candidate.data.size() == entry->data.size() + 1;
});
if (leaf)
break;
current_node = node_nested->parent;
++num_skipped_nested;
}
if (!leaf)
return false;
auto last_field = leaf->data.getLastField();
if (last_field.isNull())
return false;
const auto & least_common_type = entry->data.getLeastCommonType();
size_t num_dimensions = getNumberOfDimensions(*least_common_type);
assert(num_skipped_nested < num_dimensions);
/// Replace scalars to default values with consistent array sizes.
size_t num_dimensions_to_keep = num_dimensions - num_skipped_nested;
auto default_scalar = num_skipped_nested
? createEmptyArrayField(num_skipped_nested)
: getBaseTypeOfArray(least_common_type)->getDefault();
auto default_field = applyVisitor(FieldVisitorReplaceScalars(default_scalar, num_dimensions_to_keep), last_field);
entry->data.insert(std::move(default_field));
return true;
}
}
template <typename Parser>
template <typename Reader>
void SerializationObject<Parser>::deserializeTextImpl(IColumn & column, Reader && reader) const
@ -159,7 +94,7 @@ void SerializationObject<Parser>::deserializeTextImpl(IColumn & column, Reader &
{
if (!paths_set.has(entry->path.getPath()))
{
bool inserted = tryInsertDefaultFromNested(entry, subcolumns);
bool inserted = column_object.tryInsertDefaultFromNested(entry);
if (!inserted)
entry->data.insertDefault();
}

View File

@ -83,7 +83,7 @@ void DiskDecorator::moveDirectory(const String & from_path, const String & to_pa
delegate->moveDirectory(from_path, to_path);
}
DirectoryIteratorPtr DiskDecorator::iterateDirectory(const String & path)
DirectoryIteratorPtr DiskDecorator::iterateDirectory(const String & path) const
{
return delegate->iterateDirectory(path);
}
@ -113,7 +113,7 @@ void DiskDecorator::copyDirectoryContent(const String & from_dir, const std::sha
delegate->copyDirectoryContent(from_dir, to_disk, to_dir);
}
void DiskDecorator::listFiles(const String & path, std::vector<String> & file_names)
void DiskDecorator::listFiles(const String & path, std::vector<String> & file_names) const
{
delegate->listFiles(path, file_names);
}
@ -171,7 +171,7 @@ void DiskDecorator::setLastModified(const String & path, const Poco::Timestamp &
delegate->setLastModified(path, timestamp);
}
Poco::Timestamp DiskDecorator::getLastModified(const String & path)
Poco::Timestamp DiskDecorator::getLastModified(const String & path) const
{
return delegate->getLastModified(path);
}

View File

@ -28,13 +28,13 @@ public:
void createDirectories(const String & path) override;
void clearDirectory(const String & path) override;
void moveDirectory(const String & from_path, const String & to_path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) const override;
void createFile(const String & path) override;
void moveFile(const String & from_path, const String & to_path) override;
void replaceFile(const String & from_path, const String & to_path) override;
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
void listFiles(const String & path, std::vector<String> & file_names) const override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
@ -56,7 +56,7 @@ public:
void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
Poco::Timestamp getLastModified(const String & path) override;
Poco::Timestamp getLastModified(const String & path) const override;
void setReadOnly(const String & path) override;
void createHardLink(const String & src_path, const String & dst_path) override;
void truncateFile(const String & path, size_t size) override;

View File

@ -83,7 +83,7 @@ public:
delegate->moveDirectory(wrapped_from_path, wrapped_to_path);
}
DirectoryIteratorPtr iterateDirectory(const String & path) override
DirectoryIteratorPtr iterateDirectory(const String & path) const override
{
auto wrapped_path = wrappedPath(path);
return delegate->iterateDirectory(wrapped_path);
@ -109,7 +109,7 @@ public:
delegate->replaceFile(wrapped_from_path, wrapped_to_path);
}
void listFiles(const String & path, std::vector<String> & file_names) override
void listFiles(const String & path, std::vector<String> & file_names) const override
{
auto wrapped_path = wrappedPath(path);
delegate->listFiles(wrapped_path, file_names);
@ -192,7 +192,7 @@ public:
delegate->setLastModified(wrapped_path, timestamp);
}
Poco::Timestamp getLastModified(const String & path) override
Poco::Timestamp getLastModified(const String & path) const override
{
auto wrapped_path = wrappedPath(path);
return delegate->getLastModified(wrapped_path);

View File

@ -325,7 +325,7 @@ void DiskLocal::moveDirectory(const String & from_path, const String & to_path)
fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path);
}
DirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path)
DirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path) const
{
fs::path meta_path = fs::path(disk_path) / path;
if (!broken && fs::exists(meta_path) && fs::is_directory(meta_path))
@ -387,7 +387,7 @@ void DiskLocal::removeRecursive(const String & path)
fs::remove_all(fs::path(disk_path) / path);
}
void DiskLocal::listFiles(const String & path, std::vector<String> & file_names)
void DiskLocal::listFiles(const String & path, std::vector<String> & file_names) const
{
file_names.clear();
for (const auto & entry : fs::directory_iterator(fs::path(disk_path) / path))
@ -399,7 +399,7 @@ void DiskLocal::setLastModified(const String & path, const Poco::Timestamp & tim
FS::setModificationTime(fs::path(disk_path) / path, timestamp.epochTime());
}
Poco::Timestamp DiskLocal::getLastModified(const String & path)
Poco::Timestamp DiskLocal::getLastModified(const String & path) const
{
return FS::getModificationTimestamp(fs::path(disk_path) / path);
}

View File

@ -58,7 +58,7 @@ public:
void moveDirectory(const String & from_path, const String & to_path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) const override;
void createFile(const String & path) override;
@ -70,7 +70,7 @@ public:
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
void listFiles(const String & path, std::vector<String> & file_names) const override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
@ -91,7 +91,7 @@ public:
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
Poco::Timestamp getLastModified(const String & path) override;
Poco::Timestamp getLastModified(const String & path) const override;
void setReadOnly(const String & path) override;

View File

@ -262,7 +262,7 @@ void DiskMemory::moveDirectory(const String & /*from_path*/, const String & /*to
throw Exception("Method moveDirectory is not implemented for memory disks", ErrorCodes::NOT_IMPLEMENTED);
}
DirectoryIteratorPtr DiskMemory::iterateDirectory(const String & path)
DirectoryIteratorPtr DiskMemory::iterateDirectory(const String & path) const
{
std::lock_guard lock(mutex);
@ -409,7 +409,7 @@ void DiskMemory::removeRecursive(const String & path)
}
}
void DiskMemory::listFiles(const String & path, std::vector<String> & file_names)
void DiskMemory::listFiles(const String & path, std::vector<String> & file_names) const
{
std::lock_guard lock(mutex);

View File

@ -52,7 +52,7 @@ public:
void moveDirectory(const String & from_path, const String & to_path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) const override;
void createFile(const String & path) override;
@ -60,7 +60,7 @@ public:
void replaceFile(const String & from_path, const String & to_path) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
void listFiles(const String & path, std::vector<String> & file_names) const override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
@ -81,7 +81,7 @@ public:
void setLastModified(const String &, const Poco::Timestamp &) override {}
Poco::Timestamp getLastModified(const String &) override { return Poco::Timestamp(); }
Poco::Timestamp getLastModified(const String &) const override { return Poco::Timestamp(); }
void setReadOnly(const String & path) override;

View File

@ -171,7 +171,7 @@ void DiskRestartProxy::moveDirectory(const String & from_path, const String & to
DiskDecorator::moveDirectory(from_path, to_path);
}
DirectoryIteratorPtr DiskRestartProxy::iterateDirectory(const String & path)
DirectoryIteratorPtr DiskRestartProxy::iterateDirectory(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::iterateDirectory(path);
@ -207,7 +207,7 @@ void DiskRestartProxy::copyDirectoryContent(const String & from_dir, const std::
DiskDecorator::copyDirectoryContent(from_dir, to_disk, to_dir);
}
void DiskRestartProxy::listFiles(const String & path, std::vector<String> & file_names)
void DiskRestartProxy::listFiles(const String & path, std::vector<String> & file_names) const
{
ReadLock lock (mutex);
DiskDecorator::listFiles(path, file_names);
@ -276,7 +276,7 @@ void DiskRestartProxy::setLastModified(const String & path, const Poco::Timestam
DiskDecorator::setLastModified(path, timestamp);
}
Poco::Timestamp DiskRestartProxy::getLastModified(const String & path)
Poco::Timestamp DiskRestartProxy::getLastModified(const String & path) const
{
ReadLock lock (mutex);
return DiskDecorator::getLastModified(path);

View File

@ -37,13 +37,13 @@ public:
void createDirectories(const String & path) override;
void clearDirectory(const String & path) override;
void moveDirectory(const String & from_path, const String & to_path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) const override;
void createFile(const String & path) override;
void moveFile(const String & from_path, const String & to_path) override;
void replaceFile(const String & from_path, const String & to_path) override;
void copy(const String & from_path, const DiskPtr & to_disk, const String & to_path) override;
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
void listFiles(const String & path, std::vector<String> & file_names) const override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
@ -58,7 +58,7 @@ public:
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
Poco::Timestamp getLastModified(const String & path) override;
Poco::Timestamp getLastModified(const String & path) const override;
void setReadOnly(const String & path) override;
void createHardLink(const String & src_path, const String & dst_path) override;
void truncateFile(const String & path, size_t size) override;

View File

@ -188,7 +188,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & p
}
DirectoryIteratorPtr DiskWebServer::iterateDirectory(const String & path)
DirectoryIteratorPtr DiskWebServer::iterateDirectory(const String & path) const
{
std::vector<fs::path> dir_file_paths;
if (files.find(path) == files.end())

View File

@ -90,15 +90,15 @@ public:
size_t getFileSize(const String & path) const override;
void listFiles(const String & /* path */, std::vector<String> & /* file_names */) override { }
void listFiles(const String & /* path */, std::vector<String> & /* file_names */) const override { }
void setReadOnly(const String & /* path */) override {}
bool isDirectory(const String & path) const override;
DirectoryIteratorPtr iterateDirectory(const String & /* path */) override;
DirectoryIteratorPtr iterateDirectory(const String & /* path */) const override;
Poco::Timestamp getLastModified(const String &) override { return Poco::Timestamp{}; }
Poco::Timestamp getLastModified(const String &) const override { return Poco::Timestamp{}; }
/// Write and modification part

View File

@ -16,7 +16,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
bool IDisk::isDirectoryEmpty(const String & path)
bool IDisk::isDirectoryEmpty(const String & path) const
{
return !iterateDirectory(path)->isValid();
}

View File

@ -138,10 +138,10 @@ public:
virtual void moveDirectory(const String & from_path, const String & to_path) = 0;
/// Return iterator to the contents of the specified directory.
virtual DirectoryIteratorPtr iterateDirectory(const String & path) = 0;
virtual DirectoryIteratorPtr iterateDirectory(const String & path) const = 0;
/// Return `true` if the specified directory is empty.
bool isDirectoryEmpty(const String & path);
bool isDirectoryEmpty(const String & path) const;
/// Create empty file at `path`.
virtual void createFile(const String & path) = 0;
@ -164,7 +164,7 @@ public:
virtual void copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path);
/// List files at `path` and add their names to `file_names`
virtual void listFiles(const String & path, std::vector<String> & file_names) = 0;
virtual void listFiles(const String & path, std::vector<String> & file_names) const = 0;
/// Open the file for read and return ReadBufferFromFileBase object.
virtual std::unique_ptr<ReadBufferFromFileBase> readFile( /// NOLINT
@ -259,7 +259,7 @@ public:
virtual void setLastModified(const String & path, const Poco::Timestamp & timestamp) = 0;
/// Get last modified time of file or directory at `path`.
virtual Poco::Timestamp getLastModified(const String & path) = 0;
virtual Poco::Timestamp getLastModified(const String & path) const = 0;
/// Set file at `path` as read-only.
virtual void setReadOnly(const String & path) = 0;

View File

@ -350,13 +350,13 @@ void DiskObjectStorage::removeDirectory(const String & path)
}
DirectoryIteratorPtr DiskObjectStorage::iterateDirectory(const String & path)
DirectoryIteratorPtr DiskObjectStorage::iterateDirectory(const String & path) const
{
return metadata_storage->iterateDirectory(path);
}
void DiskObjectStorage::listFiles(const String & path, std::vector<String> & file_names)
void DiskObjectStorage::listFiles(const String & path, std::vector<String> & file_names) const
{
for (auto it = iterateDirectory(path); it->isValid(); it->next())
file_names.push_back(it->name());
@ -371,7 +371,7 @@ void DiskObjectStorage::setLastModified(const String & path, const Poco::Timesta
}
Poco::Timestamp DiskObjectStorage::getLastModified(const String & path)
Poco::Timestamp DiskObjectStorage::getLastModified(const String & path) const
{
return metadata_storage->getLastModified(path);
}

View File

@ -108,7 +108,7 @@ public:
void createHardLink(const String & src_path, const String & dst_path) override;
void createHardLink(const String & src_path, const String & dst_path, bool should_send_metadata);
void listFiles(const String & path, std::vector<String> & file_names) override;
void listFiles(const String & path, std::vector<String> & file_names) const override;
void setReadOnly(const String & path) override;
@ -124,11 +124,11 @@ public:
void removeDirectory(const String & path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) override;
DirectoryIteratorPtr iterateDirectory(const String & path) const override;
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
Poco::Timestamp getLastModified(const String & path) override;
Poco::Timestamp getLastModified(const String & path) const override;
bool isRemote() const override { return true; }

View File

@ -72,7 +72,7 @@ void DiskObjectStorageMetadata::deserializeFromString(const std::string & data)
void DiskObjectStorageMetadata::serialize(WriteBuffer & buf, bool sync) const
{
writeIntText(VERSION_RELATIVE_PATHS, buf);
writeIntText(VERSION_READ_ONLY_FLAG, buf);
writeChar('\n', buf);
writeIntText(remote_fs_objects.size(), buf);

View File

@ -15,16 +15,6 @@
namespace DB
{
struct IMetadataOperation
{
virtual void execute() = 0;
virtual void undo() = 0;
virtual void finalize() {}
virtual ~IMetadataOperation() = default;
};
using MetadataOperationPtr = std::unique_ptr<IMetadataOperation>;
class IMetadataStorage;
/// Tries to provide some "transactions" interface, which allow
@ -113,7 +103,7 @@ public:
virtual std::vector<std::string> listDirectory(const std::string & path) const = 0;
virtual DirectoryIteratorPtr iterateDirectory(const std::string & path) = 0;
virtual DirectoryIteratorPtr iterateDirectory(const std::string & path) const = 0;
virtual uint32_t getHardlinkCount(const std::string & path) const = 0;

View File

@ -484,7 +484,7 @@ std::vector<std::string> MetadataStorageFromDisk::listDirectory(const std::strin
return result_files;
}
DirectoryIteratorPtr MetadataStorageFromDisk::iterateDirectory(const std::string & path)
DirectoryIteratorPtr MetadataStorageFromDisk::iterateDirectory(const std::string & path) const
{
return disk->iterateDirectory(path);
}

View File

@ -8,6 +8,17 @@
namespace DB
{
struct IMetadataOperation
{
virtual void execute() = 0;
virtual void undo() = 0;
virtual void finalize() {}
virtual ~IMetadataOperation() = default;
};
using MetadataOperationPtr = std::unique_ptr<IMetadataOperation>;
enum class MetadataFromDiskTransactionState
{
PREPARING,
@ -50,7 +61,7 @@ public:
std::vector<std::string> listDirectory(const std::string & path) const override;
DirectoryIteratorPtr iterateDirectory(const std::string & path) override;
DirectoryIteratorPtr iterateDirectory(const std::string & path) const override;
std::string readFileToString(const std::string & path) const override;

View File

@ -0,0 +1,15 @@
#include <Disks/ObjectStorages/S3/S3Capabilities.h>
namespace DB
{
S3Capabilities getCapabilitiesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
{
return S3Capabilities
{
.support_batch_delete = config.getBool(config_prefix + ".support_batch_delete", true),
.support_proxy = config.getBool(config_prefix + ".support_proxy", config.has(config_prefix + ".proxy")),
};
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <string>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
/// Supported/unsupported features by different S3 implementations
/// Can be useful only for almost compatible with AWS S3 versions.
struct S3Capabilities
{
/// Google S3 implementation doesn't support batch delete
/// TODO: possibly we have to use Google SDK https://github.com/googleapis/google-cloud-cpp/tree/main/google/cloud/storage
/// because looks like it miss a lot of features like:
/// 1) batch delete
/// 2) list_v2
/// 3) multipart upload works differently
bool support_batch_delete{true};
/// Y.Cloud S3 implementation support proxy for connection
bool support_proxy{false};
};
S3Capabilities getCapabilitiesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
}

View File

@ -17,6 +17,7 @@
#include <aws/s3/model/CopyObjectRequest.h>
#include <aws/s3/model/ListObjectsV2Request.h>
#include <aws/s3/model/HeadObjectRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <aws/s3/model/DeleteObjectsRequest.h>
#include <aws/s3/model/CreateMultipartUploadRequest.h>
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
@ -213,18 +214,34 @@ void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & chi
void S3ObjectStorage::removeObject(const std::string & path)
{
auto client_ptr = client.get();
Aws::S3::Model::ObjectIdentifier obj;
obj.SetKey(path);
auto settings_ptr = s3_settings.get();
Aws::S3::Model::Delete delkeys;
delkeys.SetObjects({obj});
// If chunk size is 0, only use single delete request
// This allows us to work with GCS, which doesn't support DeleteObjects
if (!s3_capabilities.support_batch_delete)
{
Aws::S3::Model::DeleteObjectRequest request;
request.SetBucket(bucket);
request.SetKey(path);
auto outcome = client_ptr->DeleteObject(request);
Aws::S3::Model::DeleteObjectsRequest request;
request.SetBucket(bucket);
request.SetDelete(delkeys);
auto outcome = client_ptr->DeleteObjects(request);
throwIfError(outcome);
}
else
{
/// TODO: For AWS we prefer to use multiobject operation even for single object
/// maybe we shouldn't?
Aws::S3::Model::ObjectIdentifier obj;
obj.SetKey(path);
Aws::S3::Model::Delete delkeys;
delkeys.SetObjects({obj});
Aws::S3::Model::DeleteObjectsRequest request;
request.SetBucket(bucket);
request.SetDelete(delkeys);
auto outcome = client_ptr->DeleteObjects(request);
throwIfError(outcome);
throwIfError(outcome);
}
}
void S3ObjectStorage::removeObjects(const std::vector<std::string> & paths)
@ -235,31 +252,39 @@ void S3ObjectStorage::removeObjects(const std::vector<std::string> & paths)
auto client_ptr = client.get();
auto settings_ptr = s3_settings.get();
size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete;
size_t current_position = 0;
while (current_position < paths.size())
if (!s3_capabilities.support_batch_delete)
{
std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk;
String keys;
for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position)
for (const auto & path : paths)
removeObject(path);
}
else
{
size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete;
size_t current_position = 0;
while (current_position < paths.size())
{
Aws::S3::Model::ObjectIdentifier obj;
obj.SetKey(paths[current_position]);
current_chunk.push_back(obj);
std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk;
String keys;
for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position)
{
Aws::S3::Model::ObjectIdentifier obj;
obj.SetKey(paths[current_position]);
current_chunk.push_back(obj);
if (!keys.empty())
keys += ", ";
keys += paths[current_position];
if (!keys.empty())
keys += ", ";
keys += paths[current_position];
}
Aws::S3::Model::Delete delkeys;
delkeys.SetObjects(current_chunk);
Aws::S3::Model::DeleteObjectsRequest request;
request.SetBucket(bucket);
request.SetDelete(delkeys);
auto outcome = client_ptr->DeleteObjects(request);
throwIfError(outcome);
}
Aws::S3::Model::Delete delkeys;
delkeys.SetObjects(current_chunk);
Aws::S3::Model::DeleteObjectsRequest request;
request.SetBucket(bucket);
request.SetDelete(delkeys);
auto outcome = client_ptr->DeleteObjects(request);
throwIfError(outcome);
}
}
@ -493,7 +518,7 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(const std::s
return std::make_unique<S3ObjectStorage>(
nullptr, getClient(config, config_prefix, context),
getSettings(config, config_prefix, context),
version_id, new_namespace);
version_id, s3_capabilities, new_namespace);
}
}

View File

@ -5,6 +5,7 @@
#if USE_AWS_S3
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <Disks/ObjectStorages/S3/S3Capabilities.h>
#include <memory>
#include <aws/s3/S3Client.h>
#include <aws/s3/model/HeadObjectResult.h>
@ -46,11 +47,13 @@ public:
std::unique_ptr<Aws::S3::S3Client> && client_,
std::unique_ptr<S3ObjectStorageSettings> && s3_settings_,
String version_id_,
const S3Capabilities & s3_capabilities_,
String bucket_)
: IObjectStorage(std::move(cache_))
, bucket(bucket_)
, client(std::move(client_))
, s3_settings(std::move(s3_settings_))
, s3_capabilities(s3_capabilities_)
, version_id(std::move(version_id_))
{}
@ -129,6 +132,7 @@ private:
MultiVersion<Aws::S3::S3Client> client;
MultiVersion<S3ObjectStorageSettings> s3_settings;
const S3Capabilities s3_capabilities;
const String version_id;
};

View File

@ -89,11 +89,12 @@ void registerDiskS3(DiskFactory & factory)
auto metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri.key);
FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context);
S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
ObjectStoragePtr s3_storage = std::make_unique<S3ObjectStorage>(
std::move(cache), getClient(config, config_prefix, context),
getSettings(config, config_prefix, context),
uri.version_id, uri.bucket);
uri.version_id, s3_capabilities, uri.bucket);
bool send_metadata = config.getBool(config_prefix + ".send_metadata", false);
uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16);

View File

@ -43,6 +43,7 @@
#include <Disks/DiskRestartProxy.h>
#include <Storages/StorageDistributed.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/Freeze.h>
#include <Storages/StorageFactory.h>
#include <Parsers/ASTSystemQuery.h>
#include <Parsers/ASTDropQuery.h>
@ -235,6 +236,8 @@ BlockIO InterpreterSystemQuery::execute()
}
BlockIO result;
volume_ptr = {};
if (!query.storage_policy.empty() && !query.volume.empty())
volume_ptr = getContext()->getStoragePolicy(query.storage_policy)->getVolumeByName(query.volume);
@ -493,11 +496,18 @@ BlockIO InterpreterSystemQuery::execute()
getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER);
ThreadFuzzer::start();
break;
case Type::UNFREEZE:
{
getContext()->checkAccess(AccessType::SYSTEM_UNFREEZE);
/// The result contains information about deleted parts as a table. It is for compatibility with ALTER TABLE UNFREEZE query.
result = Unfreezer().unfreeze(query.backup_name, getContext());
break;
}
default:
throw Exception("Unknown type of SYSTEM query", ErrorCodes::BAD_ARGUMENTS);
}
return BlockIO();
return result;
}
void InterpreterSystemQuery::restoreReplica()
@ -968,6 +978,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
required_access.emplace_back(AccessType::SYSTEM_RESTART_DISK);
break;
}
case Type::UNFREEZE:
{
required_access.emplace_back(AccessType::SYSTEM_UNFREEZE);
break;
}
case Type::STOP_LISTEN_QUERIES:
case Type::START_LISTEN_QUERIES:
case Type::STOP_THREAD_FUZZER:

View File

@ -201,7 +201,7 @@ void TransactionLog::loadLogFromZooKeeper()
/// 3. support 64-bit CSNs on top of Apache ZooKeeper (it uses Int32 for sequential numbers)
Strings entries_list = zookeeper->getChildren(zookeeper_path_log, nullptr, log_updated_event);
chassert(!entries_list.empty());
std::sort(entries_list.begin(), entries_list.end());
::sort(entries_list.begin(), entries_list.end());
loadEntries(entries_list.begin(), entries_list.end());
chassert(!last_loaded_entry.empty());
chassert(latest_snapshot == deserializeCSN(last_loaded_entry));
@ -262,7 +262,7 @@ void TransactionLog::loadNewEntries()
{
Strings entries_list = zookeeper->getChildren(zookeeper_path_log, nullptr, log_updated_event);
chassert(!entries_list.empty());
std::sort(entries_list.begin(), entries_list.end());
::sort(entries_list.begin(), entries_list.end());
auto it = std::upper_bound(entries_list.begin(), entries_list.end(), last_loaded_entry);
loadEntries(it, entries_list.end());
chassert(last_loaded_entry == entries_list.back());
@ -602,7 +602,7 @@ void TransactionLog::sync() const
{
Strings entries_list = zookeeper->getChildren(zookeeper_path_log);
chassert(!entries_list.empty());
std::sort(entries_list.begin(), entries_list.end());
::sort(entries_list.begin(), entries_list.end());
CSN newest_csn = deserializeCSN(entries_list.back());
waitForCSNLoaded(newest_csn);
}

View File

@ -469,7 +469,7 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
for (const auto & name : required_result_columns)
name_pos[name] = pos++;
}
std::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs)
::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs)
{
String lhs_name = lhs->getAliasOrColumnName();
String rhs_name = rhs->getAliasOrColumnName();

View File

@ -66,6 +66,7 @@ public:
START_DISTRIBUTED_SENDS,
START_THREAD_FUZZER,
STOP_THREAD_FUZZER,
UNFREEZE,
END
};
@ -93,6 +94,7 @@ public:
UInt64 seconds{};
String filesystem_cache_path;
String backup_name;
String getID(char) const override { return "SYSTEM query"; }

View File

@ -363,6 +363,20 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
break;
}
case Type::UNFREEZE:
{
ASTPtr ast;
if (ParserKeyword{"WITH NAME"}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected))
{
res->backup_name = ast->as<ASTLiteral &>().value.get<const String &>();
}
else
{
return false;
}
break;
}
default:
{
parseQueryWithOnCluster(res, pos, expected);

View File

@ -7,6 +7,7 @@
#include <Storages/Cache/ExternalDataSourceCache.h>
#include <Storages/Cache/RemoteFileMetadataFactory.h>
#include <base/errnoToString.h>
#include <base/sort.h>
#include <Common/logger_useful.h>
#include <base/sleep.h>
#include <Poco/Logger.h>
@ -229,7 +230,7 @@ void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_d
LOG_INFO(
log, "Initializing local cache for remote data sources. Local cache root path: {}, cache size limit: {}", root_dir_, limit_size_);
splitInto<','>(root_dirs, root_dir_);
std::sort(root_dirs.begin(), root_dirs.end());
::sort(root_dirs.begin(), root_dirs.end());
local_cache_bytes_read_before_flush = bytes_read_before_flush_;
lru_caches = std::make_unique<RemoteFileCacheType>(limit_size_);

201
src/Storages/Freeze.cpp Normal file
View File

@ -0,0 +1,201 @@
#include <Storages/Freeze.h>
#include <Disks/ObjectStorages/IMetadataStorage.h>
#include <Storages/PartitionCommands.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
namespace DB
{
void FreezeMetaData::fill(const StorageReplicatedMergeTree & storage)
{
is_replicated = storage.supportsReplication();
is_remote = storage.isRemote();
replica_name = storage.getReplicaName();
zookeeper_name = storage.getZooKeeperName();
table_shared_id = storage.getTableSharedID();
}
void FreezeMetaData::save(DiskPtr data_disk, const String & path) const
{
auto metadata_storage = data_disk->getMetadataStorage();
auto file_path = getFileName(path);
auto tx = metadata_storage->createTransaction();
WriteBufferFromOwnString buffer;
writeIntText(version, buffer);
buffer.write("\n", 1);
writeBoolText(is_replicated, buffer);
buffer.write("\n", 1);
writeBoolText(is_remote, buffer);
buffer.write("\n", 1);
writeString(replica_name, buffer);
buffer.write("\n", 1);
writeString(zookeeper_name, buffer);
buffer.write("\n", 1);
writeString(table_shared_id, buffer);
buffer.write("\n", 1);
tx->writeStringToFile(file_path, buffer.str());
tx->commit();
}
bool FreezeMetaData::load(DiskPtr data_disk, const String & path)
{
auto metadata_storage = data_disk->getMetadataStorage();
auto file_path = getFileName(path);
if (!metadata_storage->exists(file_path))
return false;
auto metadata_str = metadata_storage->readFileToString(file_path);
ReadBufferFromString buffer(metadata_str);
readIntText(version, buffer);
if (version != 1)
{
LOG_ERROR(&Poco::Logger::get("FreezeMetaData"), "Unknown freezed metadata version: {}", version);
return false;
}
DB::assertChar('\n', buffer);
readBoolText(is_replicated, buffer);
DB::assertChar('\n', buffer);
readBoolText(is_remote, buffer);
DB::assertChar('\n', buffer);
readString(replica_name, buffer);
DB::assertChar('\n', buffer);
readString(zookeeper_name, buffer);
DB::assertChar('\n', buffer);
readString(table_shared_id, buffer);
DB::assertChar('\n', buffer);
return true;
}
void FreezeMetaData::clean(DiskPtr data_disk, const String & path)
{
auto metadata_storage = data_disk->getMetadataStorage();
auto fname = getFileName(path);
if (metadata_storage->exists(fname))
{
auto tx = metadata_storage->createTransaction();
tx->unlinkFile(fname);
tx->commit();
}
}
String FreezeMetaData::getFileName(const String & path)
{
return fs::path(path) / "frozen_metadata.txt";
}
BlockIO Unfreezer::unfreeze(const String & backup_name, ContextPtr local_context)
{
LOG_DEBUG(log, "Unfreezing backup {}", backup_name);
auto disks_map = local_context->getDisksMap();
Disks disks;
for (auto & [name, disk]: disks_map)
{
disks.push_back(disk);
}
auto backup_path = fs::path(backup_directory_prefix) / escapeForFileName(backup_name);
auto store_path = backup_path / "store";
PartitionCommandsResultInfo result_info;
for (const auto & disk: disks)
{
if (!disk->exists(store_path))
continue;
for (auto prefix_it = disk->iterateDirectory(store_path); prefix_it->isValid(); prefix_it->next())
{
auto prefix_directory = store_path / prefix_it->name();
for (auto table_it = disk->iterateDirectory(prefix_directory); table_it->isValid(); table_it->next())
{
auto table_directory = prefix_directory / table_it->name();
auto current_result_info = unfreezePartitionsFromTableDirectory([] (const String &) { return true; }, backup_name, {disk}, table_directory, local_context);
for (auto & command_result : current_result_info)
{
command_result.command_type = "SYSTEM UNFREEZE";
}
result_info.insert(
result_info.end(),
std::make_move_iterator(current_result_info.begin()),
std::make_move_iterator(current_result_info.end()));
}
}
if (disk->exists(backup_path))
{
disk->removeRecursive(backup_path);
}
}
BlockIO result;
if (!result_info.empty())
{
result.pipeline = QueryPipeline(convertCommandsResultToSource(result_info));
}
return result;
}
bool Unfreezer::removeFreezedPart(DiskPtr disk, const String & path, const String & part_name, ContextPtr local_context)
{
if (disk->supportZeroCopyReplication())
{
FreezeMetaData meta;
if (meta.load(disk, path))
{
if (meta.is_replicated)
{
FreezeMetaData::clean(disk, path);
return StorageReplicatedMergeTree::removeSharedDetachedPart(disk, path, part_name, meta.table_shared_id, meta.zookeeper_name, meta.replica_name, "", local_context);
}
}
}
disk->removeRecursive(path);
return false;
}
PartitionCommandsResultInfo Unfreezer::unfreezePartitionsFromTableDirectory(MergeTreeData::MatcherFn matcher, const String & backup_name, const Disks & disks, const fs::path & table_directory, ContextPtr local_context)
{
PartitionCommandsResultInfo result;
for (const auto & disk : disks)
{
if (!disk->exists(table_directory))
continue;
for (auto it = disk->iterateDirectory(table_directory); it->isValid(); it->next())
{
const auto & partition_directory = it->name();
/// Partition ID is prefix of part directory name: <partition id>_<rest of part directory name>
auto found = partition_directory.find('_');
if (found == std::string::npos)
continue;
auto partition_id = partition_directory.substr(0, found);
if (!matcher(partition_id))
continue;
const auto & path = it->path();
bool keep_shared = removeFreezedPart(disk, path, partition_directory, local_context);
result.push_back(PartitionCommandResultInfo{
.partition_id = partition_id,
.part_name = partition_directory,
.backup_path = disk->getPath() + table_directory.generic_string(),
.part_backup_path = disk->getPath() + path,
.backup_name = backup_name,
});
LOG_DEBUG(log, "Unfreezed part by path {}, keep shared data: {}", disk->getPath() + path, keep_shared);
}
}
LOG_DEBUG(log, "Unfreezed {} parts", result.size());
return result;
}
}

45
src/Storages/Freeze.h Normal file
View File

@ -0,0 +1,45 @@
#pragma once
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeData.h>
namespace DB
{
/// Special metadata used during freeze table. Required for zero-copy
/// replication.
struct FreezeMetaData
{
public:
void fill(const StorageReplicatedMergeTree & storage);
void save(DiskPtr data_disk, const String & path) const;
bool load(DiskPtr data_disk, const String & path);
static void clean(DiskPtr data_disk, const String & path);
private:
static String getFileName(const String & path);
public:
int version = 1;
bool is_replicated{false};
bool is_remote{false};
String replica_name;
String zookeeper_name;
String table_shared_id;
};
class Unfreezer
{
public:
PartitionCommandsResultInfo unfreezePartitionsFromTableDirectory(MergeTreeData::MatcherFn matcher, const String & backup_name, const Disks & disks, const fs::path & table_directory, ContextPtr local_context);
BlockIO unfreeze(const String & backup_name, ContextPtr local_context);
private:
Poco::Logger * log = &Poco::Logger::get("Unfreezer");
static constexpr std::string_view backup_directory_prefix = "shadow";
static bool removeFreezedPart(DiskPtr disk, const String & path, const String & part_name, ContextPtr local_context);
};
}

View File

@ -56,6 +56,7 @@
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/VirtualColumnUtils.h>
#include <Storages/Freeze.h>
#include <Common/Increment.h>
#include <Common/SimpleIncrement.h>
#include <Common/Stopwatch.h>
@ -6112,51 +6113,15 @@ bool MergeTreeData::removeDetachedPart(DiskPtr disk, const String & path, const
return false;
}
PartitionCommandsResultInfo MergeTreeData::unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, ContextPtr)
PartitionCommandsResultInfo MergeTreeData::unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, ContextPtr local_context)
{
auto backup_path = fs::path("shadow") / escapeForFileName(backup_name) / relative_data_path;
LOG_DEBUG(log, "Unfreezing parts by path {}", backup_path.generic_string());
PartitionCommandsResultInfo result;
auto disks = getStoragePolicy()->getDisks();
for (const auto & disk : getStoragePolicy()->getDisks())
{
if (!disk->exists(backup_path))
continue;
for (auto it = disk->iterateDirectory(backup_path); it->isValid(); it->next())
{
const auto & partition_directory = it->name();
/// Partition ID is prefix of part directory name: <partition id>_<rest of part directory name>
auto found = partition_directory.find('_');
if (found == std::string::npos)
continue;
auto partition_id = partition_directory.substr(0, found);
if (!matcher(partition_id))
continue;
const auto & path = it->path();
bool keep_shared = removeDetachedPart(disk, path, partition_directory, true);
result.push_back(PartitionCommandResultInfo{
.partition_id = partition_id,
.part_name = partition_directory,
.backup_path = disk->getPath() + backup_path.generic_string(),
.part_backup_path = disk->getPath() + path,
.backup_name = backup_name,
});
LOG_DEBUG(log, "Unfreezed part by path {}, keep shared data: {}", disk->getPath() + path, keep_shared);
}
}
LOG_DEBUG(log, "Unfreezed {} parts", result.size());
return result;
return Unfreezer().unfreezePartitionsFromTableDirectory(matcher, backup_name, disks, backup_path, local_context);
}
bool MergeTreeData::canReplacePartition(const DataPartPtr & src_part) const

View File

@ -988,6 +988,9 @@ public:
/// Mutex for currently_submerging_parts and currently_emerging_parts
mutable std::mutex currently_submerging_emerging_mutex;
/// Used for freezePartitionsByMatcher and unfreezePartitionsByMatcher
using MatcherFn = std::function<bool(const String &)>;
protected:
friend class IMergeTreeDataPart;
friend class MergeTreeDataMergerMutator;
@ -1178,7 +1181,6 @@ protected:
bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node, const StorageMetadataPtr & metadata_snapshot) const;
/// Common part for |freezePartition()| and |freezeAll()|.
using MatcherFn = std::function<bool(const String &)>;
PartitionCommandsResultInfo freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, ContextPtr context);
PartitionCommandsResultInfo unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, ContextPtr context);

View File

@ -1256,6 +1256,27 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
{
ignore_max_size = max_source_parts_size == data_settings->max_bytes_to_merge_at_max_space_in_pool;
if (data_settings->always_fetch_merged_part && entry.num_tries > 0)
{
static constexpr auto MAX_SECONDS_TO_WAIT = 300L;
static constexpr auto BACKOFF_SECONDS = 3;
auto time_to_wait_seconds = std::min<int64_t>(MAX_SECONDS_TO_WAIT, entry.num_tries * BACKOFF_SECONDS);
auto time_since_last_try_seconds = std::time(nullptr) - entry.last_attempt_time;
/// Otherwise we will constantly look for part on other replicas
/// and load zookeeper too much.
if (time_to_wait_seconds > time_since_last_try_seconds)
{
out_postpone_reason = fmt::format(
"Not executing log entry ({}) to merge parts for part {} because `always_fetch_merged_part` enabled and "
" not enough time had been passed since last try, have to wait {} seconds",
entry.znode_name, entry.new_part_name, time_to_wait_seconds - time_since_last_try_seconds);
LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
return false;
}
}
if (isTTLMergeType(entry.merge_type))
{
if (merger_mutator.ttl_merges_blocker.isCancelled())

View File

@ -36,6 +36,7 @@
#include <Storages/MergeTree/MergeTreeReaderCompact.h>
#include <Storages/MergeTree/LeaderElection.h>
#include <Storages/MergeTree/ZeroCopyLock.h>
#include <Storages/Freeze.h>
#include <Databases/DatabaseOnDisk.h>
@ -8155,107 +8156,6 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode(
}
}
namespace
{
/// Special metadata used during freeze table. Required for zero-copy
/// replication.
struct FreezeMetaData
{
public:
void fill(const StorageReplicatedMergeTree & storage)
{
is_replicated = storage.supportsReplication();
is_remote = storage.isRemote();
replica_name = storage.getReplicaName();
zookeeper_name = storage.getZooKeeperName();
table_shared_id = storage.getTableSharedID();
}
void save(DiskPtr data_disk, const String & path) const
{
auto metadata_storage = data_disk->getMetadataStorage();
auto file_path = getFileName(path);
auto tx = metadata_storage->createTransaction();
WriteBufferFromOwnString buffer;
writeIntText(version, buffer);
buffer.write("\n", 1);
writeBoolText(is_replicated, buffer);
buffer.write("\n", 1);
writeBoolText(is_remote, buffer);
buffer.write("\n", 1);
writeString(replica_name, buffer);
buffer.write("\n", 1);
writeString(zookeeper_name, buffer);
buffer.write("\n", 1);
writeString(table_shared_id, buffer);
buffer.write("\n", 1);
tx->writeStringToFile(file_path, buffer.str());
tx->commit();
}
bool load(DiskPtr data_disk, const String & path)
{
auto metadata_storage = data_disk->getMetadataStorage();
auto file_path = getFileName(path);
if (!metadata_storage->exists(file_path))
return false;
auto metadata_str = metadata_storage->readFileToString(file_path);
ReadBufferFromString buffer(metadata_str);
readIntText(version, buffer);
if (version != 1)
{
LOG_ERROR(&Poco::Logger::get("FreezeMetaData"), "Unknown freezed metadata version: {}", version);
return false;
}
DB::assertChar('\n', buffer);
readBoolText(is_replicated, buffer);
DB::assertChar('\n', buffer);
readBoolText(is_remote, buffer);
DB::assertChar('\n', buffer);
readString(replica_name, buffer);
DB::assertChar('\n', buffer);
readString(zookeeper_name, buffer);
DB::assertChar('\n', buffer);
readString(table_shared_id, buffer);
DB::assertChar('\n', buffer);
return true;
}
static void clean(DiskPtr data_disk, const String & path)
{
auto metadata_storage = data_disk->getMetadataStorage();
auto fname = getFileName(path);
if (metadata_storage->exists(fname))
{
auto tx = metadata_storage->createTransaction();
tx->unlinkFile(fname);
tx->commit();
}
}
private:
static String getFileName(const String & path)
{
return fs::path(path) / "frozen_metadata.txt";
}
public:
int version = 1;
bool is_replicated;
bool is_remote;
String replica_name;
String zookeeper_name;
String table_shared_id;
};
}
bool StorageReplicatedMergeTree::removeDetachedPart(DiskPtr disk, const String & path, const String & part_name, bool is_freezed)
{
if (disk->supportZeroCopyReplication())
@ -8266,14 +8166,14 @@ bool StorageReplicatedMergeTree::removeDetachedPart(DiskPtr disk, const String &
if (meta.load(disk, path))
{
FreezeMetaData::clean(disk, path);
return removeSharedDetachedPart(disk, path, part_name, meta.table_shared_id, meta.zookeeper_name, meta.replica_name, "");
return removeSharedDetachedPart(disk, path, part_name, meta.table_shared_id, meta.zookeeper_name, meta.replica_name, "", getContext());
}
}
else
{
String table_id = getTableSharedID();
return removeSharedDetachedPart(disk, path, part_name, table_id, zookeeper_name, replica_name, zookeeper_path);
return removeSharedDetachedPart(disk, path, part_name, table_id, zookeeper_name, replica_name, zookeeper_path, getContext());
}
}
@ -8284,11 +8184,11 @@ bool StorageReplicatedMergeTree::removeDetachedPart(DiskPtr disk, const String &
bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const String & path, const String & part_name, const String & table_uuid,
const String &, const String & detached_replica_name, const String & detached_zookeeper_path)
const String &, const String & detached_replica_name, const String & detached_zookeeper_path, ContextPtr local_context)
{
bool keep_shared = false;
zkutil::ZooKeeperPtr zookeeper = getZooKeeper();
zkutil::ZooKeeperPtr zookeeper = local_context->getZooKeeper();
NameSet files_not_to_remove;
fs::path checksums = fs::path(path) / IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK;
@ -8299,7 +8199,7 @@ bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const St
String id = disk->getUniqueId(checksums);
bool can_remove = false;
std::tie(can_remove, files_not_to_remove) = StorageReplicatedMergeTree::unlockSharedDataByID(id, table_uuid, part_name,
detached_replica_name, disk, zookeeper, getContext()->getReplicatedMergeTreeSettings(), log,
detached_replica_name, disk, zookeeper, local_context->getReplicatedMergeTreeSettings(), &Poco::Logger::get("StorageReplicatedMergeTree"),
detached_zookeeper_path);
keep_shared = !can_remove;

View File

@ -300,6 +300,9 @@ public:
/// Check if there are new broken disks and enqueue part recovery tasks.
void checkBrokenDisks();
static bool removeSharedDetachedPart(DiskPtr disk, const String & path, const String & part_name, const String & table_uuid,
const String & zookeeper_name, const String & replica_name, const String & zookeeper_path, ContextPtr local_context);
private:
std::atomic_bool are_restoring_replica {false};
@ -792,9 +795,6 @@ private:
bool removeDetachedPart(DiskPtr disk, const String & path, const String & part_name, bool is_freezed) override;
bool removeSharedDetachedPart(DiskPtr disk, const String & path, const String & part_name, const String & table_uuid,
const String & zookeeper_name, const String & replica_name, const String & zookeeper_path);
/// Create freeze metadata for table and save in zookeeper. Required only if zero-copy replication enabled.
void createAndStoreFreezeMetadata(DiskPtr disk, DataPartPtr part, String backup_part_path) const override;

View File

@ -187,7 +187,11 @@ def gen_tags(version: ClickHouseVersion, release_type: str) -> List[str]:
def buildx_args(bucket_prefix: str, arch: str) -> List[str]:
args = [f"--platform=linux/{arch}", f"--label=build-url={GITHUB_RUN_URL}"]
args = [
f"--platform=linux/{arch}",
f"--label=build-url={GITHUB_RUN_URL}",
f"--label=com.clickhouse.build.githash={git.sha}",
]
if bucket_prefix:
url = p.join(bucket_prefix, BUCKETS[arch]) # to prevent a double //
args.append(f"--build-arg=REPOSITORY='{url}'")

View File

@ -342,6 +342,18 @@ def test_implicit_create_view_grant():
"CREATE VIEW test.view_1 AS SELECT 1", user="A"
)
# check grant option
instance.query("CREATE USER B")
expected_error = "Not enough privileges"
assert expected_error in instance.query_and_get_error(
"GRANT CREATE VIEW ON test.* TO B", user="A"
)
instance.query("GRANT CREATE TABLE ON test.* TO A WITH GRANT OPTION")
instance.query("GRANT CREATE VIEW ON test.* TO B", user="A")
instance.query("CREATE VIEW test.view_2 AS SELECT 1", user="B")
assert instance.query("SELECT * FROM test.view_2") == "1\n"
def test_implicit_create_temporary_table_grant():
instance.query("CREATE USER A")

View File

@ -525,6 +525,38 @@ def test_freeze_unfreeze(cluster, node_name):
)
@pytest.mark.parametrize("node_name", ["node"])
def test_freeze_system_unfreeze(cluster, node_name):
node = cluster.instances[node_name]
create_table(node, "s3_test")
create_table(node, "s3_test_removed")
minio = cluster.minio_client
node.query(
"INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-04", 4096))
)
node.query(
"INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-04", 4096))
)
node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup3'")
node.query("ALTER TABLE s3_test_removed FREEZE WITH NAME 'backup3'")
node.query("TRUNCATE TABLE s3_test")
node.query("DROP TABLE s3_test_removed NO DELAY")
assert (
len(list(minio.list_objects(cluster.minio_bucket, "data/")))
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
# Unfreeze all data from backup3.
node.query("SYSTEM UNFREEZE WITH NAME 'backup3'")
# Data should be removed from S3.
assert (
len(list(minio.list_objects(cluster.minio_bucket, "data/"))) == FILES_OVERHEAD
)
@pytest.mark.parametrize("node_name", ["node"])
def test_s3_disk_apply_new_settings(cluster, node_name):
node = cluster.instances[node_name]

View File

@ -412,7 +412,7 @@ def wait_mutations(node, table, seconds):
assert mutations == "0\n"
def test_s3_zero_copy_unfreeze(cluster):
def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template):
node1 = cluster.instances["node1"]
node2 = cluster.instances["node2"]
@ -455,12 +455,12 @@ def test_s3_zero_copy_unfreeze(cluster):
check_objects_exisis(cluster, objects11)
node1.query("ALTER TABLE unfreeze_test UNFREEZE WITH NAME 'freeze_backup1'")
node1.query(f"{unfreeze_query_template} 'freeze_backup1'")
wait_mutations(node1, "unfreeze_test", 10)
check_objects_exisis(cluster, objects12)
node2.query("ALTER TABLE unfreeze_test UNFREEZE WITH NAME 'freeze_backup2'")
node2.query(f"{unfreeze_query_template} 'freeze_backup2'")
wait_mutations(node2, "unfreeze_test", 10)
check_objects_not_exisis(cluster, objects12)
@ -469,7 +469,15 @@ def test_s3_zero_copy_unfreeze(cluster):
node2.query("DROP TABLE IF EXISTS unfreeze_test NO DELAY")
def test_s3_zero_copy_drop_detached(cluster):
def test_s3_zero_copy_unfreeze_alter(cluster):
s3_zero_copy_unfreeze_base(cluster, "ALTER TABLE unfreeze_test UNFREEZE WITH NAME")
def test_s3_zero_copy_unfreeze_system(cluster):
s3_zero_copy_unfreeze_base(cluster, "SYSTEM UNFREEZE WITH NAME")
def s3_zero_copy_drop_detached(cluster, unfreeze_query_template):
node1 = cluster.instances["node1"]
node2 = cluster.instances["node2"]
@ -498,8 +506,8 @@ def test_s3_zero_copy_drop_detached(cluster):
objects_diff = list(set(objects2) - set(objects1))
node1.query("ALTER TABLE drop_detached_test UNFREEZE WITH NAME 'detach_backup2'")
node1.query("ALTER TABLE drop_detached_test UNFREEZE WITH NAME 'detach_backup1'")
node1.query(f"{unfreeze_query_template} 'detach_backup2'")
node1.query(f"{unfreeze_query_template} 'detach_backup1'")
node1.query("ALTER TABLE drop_detached_test DETACH PARTITION '0'")
node1.query("ALTER TABLE drop_detached_test DETACH PARTITION '1'")
@ -554,6 +562,16 @@ def test_s3_zero_copy_drop_detached(cluster):
check_objects_not_exisis(cluster, objects1)
def test_s3_zero_copy_drop_detached_alter(cluster):
s3_zero_copy_drop_detached(
cluster, "ALTER TABLE drop_detached_test UNFREEZE WITH NAME"
)
def test_s3_zero_copy_drop_detached_system(cluster):
s3_zero_copy_drop_detached(cluster, "SYSTEM UNFREEZE WITH NAME")
def test_s3_zero_copy_concurrent_merge(cluster):
node1 = cluster.instances["node1"]
node2 = cluster.instances["node2"]

View File

@ -3,7 +3,6 @@
<substitution>
<name>dictionary_layout</name>
<values>
<value>flat</value>
<value>hashed</value>
<value>hashed_array</value>
</values>
@ -23,7 +22,7 @@
(
id UInt64,
parent_id UInt64
) ENGINE = Memory;
) ENGINE = MergeTree ORDER BY id;
</create_query>
<create_query>
@ -34,22 +33,43 @@
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(DB 'default' TABLE 'hierarchical_dictionary_source_table'))
LAYOUT({dictionary_layout}())
LAYOUT({dictionary_layout})
LIFETIME(0);
</create_query>
<create_query>
CREATE DICTIONARY hierarchical_flat_dictionary
(
id UInt64,
parent_id UInt64 HIERARCHICAL
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(DB 'default' TABLE 'hierarchical_dictionary_source_table'))
LAYOUT(FLAT(max_array_size 1000001))
LIFETIME(0);
</create_query>
<fill_query>
INSERT INTO hierarchical_dictionary_source_table
SELECT number, rand64() % 250000
WITH 5000 AS first_level_start, 50000 as second_level_start
SELECT
(number + 1) as id,
multiIf(id > second_level_start, id % (second_level_start - first_level_start) + first_level_start + 1,
id > first_level_start, (id % first_level_start) + 1,
0) as parent_id
FROM system.numbers
LIMIT 500000;
LIMIT 1000000;
</fill_query>
<query>
SELECT {func}('hierarchical_{dictionary_layout}_dictionary', id) FROM hierarchical_{dictionary_layout}_dictionary FORMAT Null;
SELECT {func}('hierarchical_flat_dictionary', number + 1) FROM numbers(1000000) FORMAT Null;
</query>
<query>
SELECT {func}('hierarchical_{dictionary_layout}_dictionary', number + 1) FROM numbers(1000000) FORMAT Null;
</query>
<drop_query>DROP TABLE IF EXISTS hierarchical_dictionary_source_table;</drop_query>
<drop_query>DROP DICTIONARY IF EXISTS hierarchical_{dictionary_layout}_dictionary;</drop_query>
<drop_query>DROP DICTIONARY IF EXISTS hierarchical_flat_dictionary;</drop_query>
</test>

View File

@ -118,6 +118,7 @@ SYSTEM FLUSH DISTRIBUTED ['FLUSH DISTRIBUTED'] TABLE SYSTEM FLUSH
SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH
SYSTEM FLUSH [] \N SYSTEM
SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM
SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM
SYSTEM [] \N ALL
dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL
addressToLine [] GLOBAL INTROSPECTION

View File

@ -22,3 +22,4 @@ command_type partition_id part_name backup_name
FREEZE PARTITION 202103 20210301_20210301_1_1_0 test_01417_single_part_old_syntax
command_type partition_id part_name backup_name
UNFREEZE PARTITION 20210301 20210301_20210301_1_1_0 test_01417_single_part_old_syntax
SYSTEM UNFREEZE 7 7_8_8_0 test_01417_single_part_7_system

View File

@ -57,6 +57,19 @@ ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax UNFREEZE P
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
# Unfreeze the whole backup with SYSTEM query
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7_system'"
${CLICKHOUSE_CLIENT} --query "DROP TABLE table_for_freeze"
${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze UNFREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7_system'" 2>/dev/null
rc=$?
if [ $rc -eq 0 ]; then
echo "ALTER query shouldn't unfreeze removed table. Code: $rc"
exit 1
fi
${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME 'test_01417_single_part_7_system'" \
| ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \
--query "SELECT command_type, partition_id, part_name, backup_name FROM table"
# teardown
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze;"
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze_old_syntax;"

View File

@ -13,3 +13,6 @@ Tuple(arr Nested(k11 Int8, k22 String, k33 Int8), k1 Int8, k2 String, k3 String)
{"data":{"k1":1,"k10":[{"a":"1","b":"2","c":{"k11":""}},{"a":"2","b":"3","c":{"k11":""}}]}}
{"data":{"k1":2,"k10":[{"a":"1","b":"2","c":{"k11":"haha"}}]}}
Tuple(k1 Int8, k10 Nested(a String, b String, c Tuple(k11 String)))
{"data":{"k1":1,"k10":[{"a":"1","b":"2","c":{"k11":""}},{"a":"2","b":"3","c":{"k11":""}}]}}
{"data":{"k1":2,"k10":[{"a":"1","b":"2","c":{"k11":"haha"}}]}}
Tuple(k1 Int8, k10 Nested(a String, b String, c Tuple(k11 String)))

View File

@ -38,18 +38,29 @@ DROP TABLE type_json_dst;
CREATE TABLE type_json_dst (data JSON) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE type_json_src (data String) ENGINE = MergeTree ORDER BY tuple();
SYSTEM STOP MERGES type_json_src;
SET max_threads = 1;
SET max_insert_threads = 1;
SET output_format_json_named_tuples_as_objects = 1;
INSERT INTO type_json_src FORMAT JSONAsString {"k1": 1, "k10": [{"a": "1", "b": "2"}, {"a": "2", "b": "3"}]};
INSERT INTO type_json_src FORMAT JSONAsString {"k1": 2, "k10": [{"a": "1", "b": "2", "c": {"k11": "haha"}}]};
-- Temporarily fix test by optimizing data to one part.
-- If order of insertion of above two lines will be changed,
-- which can happen during insertion with multiple threads,
-- this test will fail. TODO: fix this.
OPTIMIZE TABLE type_json_src FINAL;
INSERT INTO type_json_dst SELECT data FROM type_json_src;
SELECT * FROM type_json_dst ORDER BY data.k1 FORMAT JSONEachRow;
SELECT toTypeName(data) FROM type_json_dst LIMIT 1;
TRUNCATE TABLE type_json_src;
TRUNCATE TABLE type_json_dst;
-- Insert in another order. Order is important, because a way how defaults are filled differs.
INSERT INTO type_json_src FORMAT JSONAsString {"k1": 2, "k10": [{"a": "1", "b": "2", "c": {"k11": "haha"}}]};
INSERT INTO type_json_src FORMAT JSONAsString {"k1": 1, "k10": [{"a": "1", "b": "2"}, {"a": "2", "b": "3"}]};
INSERT INTO type_json_dst SELECT data FROM type_json_src;
SET output_format_json_named_tuples_as_objects = 1;
SELECT * FROM type_json_dst ORDER BY data.k1 FORMAT JSONEachRow;
SELECT toTypeName(data) FROM type_json_dst LIMIT 1;

View File

@ -277,7 +277,7 @@ CREATE TABLE system.grants
(
`user_name` Nullable(String),
`role_name` Nullable(String),
`access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM' = 120, 'dictGet' = 121, 'addressToLine' = 122, 'addressToLineWithInlines' = 123, 'addressToSymbol' = 124, 'demangle' = 125, 'INTROSPECTION' = 126, 'FILE' = 127, 'URL' = 128, 'REMOTE' = 129, 'MONGO' = 130, 'MEILISEARCH' = 131, 'MYSQL' = 132, 'POSTGRES' = 133, 'SQLITE' = 134, 'ODBC' = 135, 'JDBC' = 136, 'HDFS' = 137, 'S3' = 138, 'HIVE' = 139, 'SOURCES' = 140, 'CLUSTER' = 141, 'ALL' = 142, 'NONE' = 143),
`access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM UNFREEZE' = 120, 'SYSTEM' = 121, 'dictGet' = 122, 'addressToLine' = 123, 'addressToLineWithInlines' = 124, 'addressToSymbol' = 125, 'demangle' = 126, 'INTROSPECTION' = 127, 'FILE' = 128, 'URL' = 129, 'REMOTE' = 130, 'MONGO' = 131, 'MEILISEARCH' = 132, 'MYSQL' = 133, 'POSTGRES' = 134, 'SQLITE' = 135, 'ODBC' = 136, 'JDBC' = 137, 'HDFS' = 138, 'S3' = 139, 'HIVE' = 140, 'SOURCES' = 141, 'CLUSTER' = 142, 'ALL' = 143, 'NONE' = 144),
`database` Nullable(String),
`table` Nullable(String),
`column` Nullable(String),
@ -551,10 +551,10 @@ ENGINE = SystemPartsColumns()
COMMENT 'SYSTEM TABLE is built on the fly.'
CREATE TABLE system.privileges
(
`privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM' = 120, 'dictGet' = 121, 'addressToLine' = 122, 'addressToLineWithInlines' = 123, 'addressToSymbol' = 124, 'demangle' = 125, 'INTROSPECTION' = 126, 'FILE' = 127, 'URL' = 128, 'REMOTE' = 129, 'MONGO' = 130, 'MEILISEARCH' = 131, 'MYSQL' = 132, 'POSTGRES' = 133, 'SQLITE' = 134, 'ODBC' = 135, 'JDBC' = 136, 'HDFS' = 137, 'S3' = 138, 'HIVE' = 139, 'SOURCES' = 140, 'CLUSTER' = 141, 'ALL' = 142, 'NONE' = 143),
`privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM UNFREEZE' = 120, 'SYSTEM' = 121, 'dictGet' = 122, 'addressToLine' = 123, 'addressToLineWithInlines' = 124, 'addressToSymbol' = 125, 'demangle' = 126, 'INTROSPECTION' = 127, 'FILE' = 128, 'URL' = 129, 'REMOTE' = 130, 'MONGO' = 131, 'MEILISEARCH' = 132, 'MYSQL' = 133, 'POSTGRES' = 134, 'SQLITE' = 135, 'ODBC' = 136, 'JDBC' = 137, 'HDFS' = 138, 'S3' = 139, 'HIVE' = 140, 'SOURCES' = 141, 'CLUSTER' = 142, 'ALL' = 143, 'NONE' = 144),
`aliases` Array(String),
`level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)),
`parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM' = 120, 'dictGet' = 121, 'addressToLine' = 122, 'addressToLineWithInlines' = 123, 'addressToSymbol' = 124, 'demangle' = 125, 'INTROSPECTION' = 126, 'FILE' = 127, 'URL' = 128, 'REMOTE' = 129, 'MONGO' = 130, 'MEILISEARCH' = 131, 'MYSQL' = 132, 'POSTGRES' = 133, 'SQLITE' = 134, 'ODBC' = 135, 'JDBC' = 136, 'HDFS' = 137, 'S3' = 138, 'HIVE' = 139, 'SOURCES' = 140, 'CLUSTER' = 141, 'ALL' = 142, 'NONE' = 143))
`parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SELECT' = 5, 'INSERT' = 6, 'ALTER UPDATE' = 7, 'ALTER DELETE' = 8, 'ALTER ADD COLUMN' = 9, 'ALTER MODIFY COLUMN' = 10, 'ALTER DROP COLUMN' = 11, 'ALTER COMMENT COLUMN' = 12, 'ALTER CLEAR COLUMN' = 13, 'ALTER RENAME COLUMN' = 14, 'ALTER MATERIALIZE COLUMN' = 15, 'ALTER COLUMN' = 16, 'ALTER MODIFY COMMENT' = 17, 'ALTER ORDER BY' = 18, 'ALTER SAMPLE BY' = 19, 'ALTER ADD INDEX' = 20, 'ALTER DROP INDEX' = 21, 'ALTER MATERIALIZE INDEX' = 22, 'ALTER CLEAR INDEX' = 23, 'ALTER INDEX' = 24, 'ALTER ADD PROJECTION' = 25, 'ALTER DROP PROJECTION' = 26, 'ALTER MATERIALIZE PROJECTION' = 27, 'ALTER CLEAR PROJECTION' = 28, 'ALTER PROJECTION' = 29, 'ALTER ADD CONSTRAINT' = 30, 'ALTER DROP CONSTRAINT' = 31, 'ALTER CONSTRAINT' = 32, 'ALTER TTL' = 33, 'ALTER MATERIALIZE TTL' = 34, 'ALTER SETTINGS' = 35, 'ALTER MOVE PARTITION' = 36, 'ALTER FETCH PARTITION' = 37, 'ALTER FREEZE PARTITION' = 38, 'ALTER DATABASE SETTINGS' = 39, 'ALTER TABLE' = 40, 'ALTER DATABASE' = 41, 'ALTER VIEW REFRESH' = 42, 'ALTER VIEW MODIFY QUERY' = 43, 'ALTER VIEW' = 44, 'ALTER' = 45, 'CREATE DATABASE' = 46, 'CREATE TABLE' = 47, 'CREATE VIEW' = 48, 'CREATE DICTIONARY' = 49, 'CREATE TEMPORARY TABLE' = 50, 'CREATE FUNCTION' = 51, 'CREATE' = 52, 'DROP DATABASE' = 53, 'DROP TABLE' = 54, 'DROP VIEW' = 55, 'DROP DICTIONARY' = 56, 'DROP FUNCTION' = 57, 'DROP' = 58, 'TRUNCATE' = 59, 'OPTIMIZE' = 60, 'KILL QUERY' = 61, 'KILL TRANSACTION' = 62, 'MOVE PARTITION BETWEEN SHARDS' = 63, 'CREATE USER' = 64, 'ALTER USER' = 65, 'DROP USER' = 66, 'CREATE ROLE' = 67, 'ALTER ROLE' = 68, 'DROP ROLE' = 69, 'ROLE ADMIN' = 70, 'CREATE ROW POLICY' = 71, 'ALTER ROW POLICY' = 72, 'DROP ROW POLICY' = 73, 'CREATE QUOTA' = 74, 'ALTER QUOTA' = 75, 'DROP QUOTA' = 76, 'CREATE SETTINGS PROFILE' = 77, 'ALTER SETTINGS PROFILE' = 78, 'DROP SETTINGS PROFILE' = 79, 'SHOW USERS' = 80, 'SHOW ROLES' = 81, 'SHOW ROW POLICIES' = 82, 'SHOW QUOTAS' = 83, 'SHOW SETTINGS PROFILES' = 84, 'SHOW ACCESS' = 85, 'ACCESS MANAGEMENT' = 86, 'SYSTEM SHUTDOWN' = 87, 'SYSTEM DROP DNS CACHE' = 88, 'SYSTEM DROP MARK CACHE' = 89, 'SYSTEM DROP UNCOMPRESSED CACHE' = 90, 'SYSTEM DROP MMAP CACHE' = 91, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 92, 'SYSTEM DROP CACHE' = 93, 'SYSTEM RELOAD CONFIG' = 94, 'SYSTEM RELOAD SYMBOLS' = 95, 'SYSTEM RELOAD DICTIONARY' = 96, 'SYSTEM RELOAD MODEL' = 97, 'SYSTEM RELOAD FUNCTION' = 98, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 99, 'SYSTEM RELOAD' = 100, 'SYSTEM RESTART DISK' = 101, 'SYSTEM MERGES' = 102, 'SYSTEM TTL MERGES' = 103, 'SYSTEM FETCHES' = 104, 'SYSTEM MOVES' = 105, 'SYSTEM DISTRIBUTED SENDS' = 106, 'SYSTEM REPLICATED SENDS' = 107, 'SYSTEM SENDS' = 108, 'SYSTEM REPLICATION QUEUES' = 109, 'SYSTEM DROP REPLICA' = 110, 'SYSTEM SYNC REPLICA' = 111, 'SYSTEM RESTART REPLICA' = 112, 'SYSTEM RESTORE REPLICA' = 113, 'SYSTEM SYNC DATABASE REPLICA' = 114, 'SYSTEM SYNC TRANSACTION LOG' = 115, 'SYSTEM FLUSH DISTRIBUTED' = 116, 'SYSTEM FLUSH LOGS' = 117, 'SYSTEM FLUSH' = 118, 'SYSTEM THREAD FUZZER' = 119, 'SYSTEM UNFREEZE' = 120, 'SYSTEM' = 121, 'dictGet' = 122, 'addressToLine' = 123, 'addressToLineWithInlines' = 124, 'addressToSymbol' = 125, 'demangle' = 126, 'INTROSPECTION' = 127, 'FILE' = 128, 'URL' = 129, 'REMOTE' = 130, 'MONGO' = 131, 'MEILISEARCH' = 132, 'MYSQL' = 133, 'POSTGRES' = 134, 'SQLITE' = 135, 'ODBC' = 136, 'JDBC' = 137, 'HDFS' = 138, 'S3' = 139, 'HIVE' = 140, 'SOURCES' = 141, 'CLUSTER' = 142, 'ALL' = 143, 'NONE' = 144))
)
ENGINE = SystemPrivileges()
COMMENT 'SYSTEM TABLE is built on the fly.'

View File

@ -32,7 +32,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS)
add_subdirectory (check-mysql-binlog)
add_subdirectory (keeper-bench)
add_subdirectory (graphite-rollup)
add_subdirectory (self-extr-exec)
add_subdirectory (self-extracting-executable)
if (TARGET ch_contrib::nuraft)
add_subdirectory (keeper-data-dumper)

View File

@ -11,4 +11,5 @@ add_custom_command (TARGET compressor
POST_BUILD
COMMAND cat pre_compressor decompressor > compressor
COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/post_build.sh >> compressor
COMMAND chmod +x compressor
)

View File

@ -11,6 +11,41 @@
#include "types.h"
/// blocking write
ssize_t write_data(int fd, const void *buf, size_t count)
{
for (size_t n = 0; n < count;)
{
ssize_t sz = write(fd, reinterpret_cast<const char*>(buf) + n, count - n);
if (sz < 0)
{
if (errno == EINTR)
continue;
return sz;
}
n += sz;
}
return count;
}
/// blocking read
ssize_t read_data(int fd, void *buf, size_t count)
{
for (size_t n = 0; n < count;)
{
ssize_t sz = read(fd, reinterpret_cast<char*>(buf) + n, count - n);
if (sz < 0)
{
if (errno == EINTR)
continue;
return sz;
}
if (sz == 0)
return count - n;
n += sz;
}
return count;
}
/// Main compression part
int doCompress(char * input, char * output, off_t & in_offset, off_t & out_offset,
@ -110,7 +145,7 @@ int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct sta
}
/// Save data into file and refresh pointer
if (current_block_size != write(out_fd, output, current_block_size))
if (current_block_size != write_data(out_fd, output, current_block_size))
{
perror(nullptr);
return 1;
@ -219,7 +254,7 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const
continue;
}
printf("Size: %ld\n", info_in.st_size);
printf("Size: %td\n", info_in.st_size);
/// Save umask
files_data[i].umask = info_in.st_mode;
@ -271,30 +306,28 @@ int copy_decompressor(const char *self, int output_fd)
if (-1 == lseek(input_fd, -15, SEEK_END))
{
close(input_fd);
perror(nullptr);
close(input_fd);
return 1;
}
char size_str[16] = {0};
for (size_t s_sz = sizeof(size_str) - 1; s_sz;)
if (ssize_t sz = read_data(input_fd, size_str, 15); sz < 15)
{
ssize_t sz = read(input_fd, size_str + sizeof(size_str) - (s_sz + 1), s_sz);
if (sz <= 0)
{
close(input_fd);
if (sz < 0)
perror(nullptr);
return 1;
}
s_sz -= sz;
else
fprintf(stderr, "Error: unable to extract decompressor.\n");
close(input_fd);
return 1;
}
int decompressor_size = atoi(size_str);
if (-1 == lseek(input_fd, -(decompressor_size + 15), SEEK_END))
{
close(input_fd);
perror(nullptr);
close(input_fd);
return 1;
}
@ -310,21 +343,18 @@ int copy_decompressor(const char *self, int output_fd)
if (n < 0)
{
close(input_fd);
if (errno == EINTR)
continue;
perror(nullptr);
close(input_fd);
return 1;
}
while (n > 0)
if (n != write_data(output_fd, buf, n))
{
ssize_t sz = write(output_fd, buf, n);
if (sz < 0)
{
close(input_fd);
perror(nullptr);
return 1;
}
n -= sz;
perror(nullptr);
close(input_fd);
return 1;
}
} while (true);

View File

@ -1,9 +1,10 @@
//#include <cstddef>
//#include <cstdio>
//#include <cstring>
#include <zstd.h>
#include <sys/mman.h>
#if defined __APPLE__
#include <sys/mount.h>
#else
#include <sys/statfs.h>
#endif
#include <fcntl.h>
#include <sys/wait.h>
#include <unistd.h>
@ -51,7 +52,7 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
size = ZSTD_findFrameCompressedSize(input + in_pointer, max_block_size);
if (ZSTD_isError(size))
{
fprintf(stderr, "Error (ZSTD): %zu %s\n", size, ZSTD_getErrorName(size));
fprintf(stderr, "Error (ZSTD): %td %s\n", size, ZSTD_getErrorName(size));
error_happened = true;
break;
}
@ -59,7 +60,7 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
decompressed_size = ZSTD_getFrameContentSize(input + in_pointer, max_block_size);
if (ZSTD_isError(decompressed_size))
{
fprintf(stderr, "Error (ZSTD): %zu %s\n", decompressed_size, ZSTD_getErrorName(decompressed_size));
fprintf(stderr, "Error (ZSTD): %td %s\n", decompressed_size, ZSTD_getErrorName(decompressed_size));
error_happened = true;
break;
}
@ -170,7 +171,7 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
}
if (fs_info.f_blocks * info_in.st_blksize < decompressed_full_size)
{
fprintf(stderr, "Not enough space for decompression. Have %lu, need %zu.",
fprintf(stderr, "Not enough space for decompression. Have %tu, need %zu.",
fs_info.f_blocks * info_in.st_blksize, decompressed_full_size);
return 1;
}